diff options
author | Kirill Volinsky <mataes2007@gmail.com> | 2013-03-17 16:35:15 +0000 |
---|---|---|
committer | Kirill Volinsky <mataes2007@gmail.com> | 2013-03-17 16:35:15 +0000 |
commit | 6c43253da0dd38a409146f7acb90ae6cc1ef8069 (patch) | |
tree | 6da7874de2057cde0dd14167153950d03ea6d22f /plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp | |
parent | 82ae452fff08430d514f762f49e78fec90f88625 (diff) |
removed not used headers
added version info
git-svn-id: http://svn.miranda-ng.org/main/trunk@4079 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp')
-rw-r--r-- | plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp | 3194 |
1 files changed, 1587 insertions, 1607 deletions
diff --git a/plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp b/plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp index 7b9f4e5626..04203f2f7a 100644 --- a/plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp +++ b/plugins/UserInfoEx/src/ex_import/tinyxmlparser.cpp @@ -1,1607 +1,1587 @@ -/* -www.sourceforge.net/projects/tinyxml -Original code (2.0 and earlier)copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com) - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any -damages arising from the use of this software. - -Permission is granted to anyone to use this software for any -purpose, including commercial applications, and to alter it and -redistribute it freely, subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must -not claim that you wrote the original software. If you use this -software in a product, an acknowledgment in the product documentation -would be appreciated but is not required. - -2. Altered source versions must be plainly marked as such, and -must not be misrepresented as being the original software. - -3. This notice may not be removed or altered from any source -distribution. - -=============================================================================== - -UserinfoEx plugin for Miranda IM - -Copyright: -© 2006-2010 DeathAxe, Yasnovidyashii, Merlin, K. Romanov, Kreol -=============================================================================== -*/ - -#include <ctype.h> -#include <stddef.h> - -#ifdef USE_MMGR -#include <string.h> -#include <assert.h> -#include <stdio.h> -#include "mmgr.h" -#endif - -#include "tinyxml.h" - -//#define DEBUG_PARSER -#if defined(DEBUG_PARSER) -# if defined(DEBUG) && defined(_MSC_VER) -# include <windows.h> -# define TIXML_LOG OutputDebugString -# else -# define TIXML_LOG printf -# endif -#endif - -// Note tha "PutString" hardcodes the same list. This -// is less flexible than it appears. Changing the entries -// or order will break putstring. -TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = -{ - { "&", 5, '&' }, - { "<", 4, '<' }, - { ">", 4, '>' }, - { """, 6, '\"' }, - { "'", 6, '\'' } -}; - -// Bunch of unicode info at: -// http://www.unicode.org/faq/utf_bom.html -// Including the basic of this table, which determines the #bytes in the -// sequence from the lead byte. 1 placed for invalid sequences -- -// although the result will be junk, pass it through as much as possible. -// Beware of the non-characters in UTF-8: -// ef bb bf (Microsoft "lead bytes") -// ef bf be -// ef bf bf - -const unsigned char TIXML_UTF_LEAD_0 = 0xefU; -const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; -const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; - -const int TiXmlBase::utf8ByteTable[256] = -{ - // 0 1 2 3 4 5 6 7 8 9 a b c d e f - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0 - 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0 - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte - 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid -}; - - -void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char* output, int* length) -{ - const unsigned long BYTE_MASK = 0xBF; - const unsigned long BYTE_MARK = 0x80; - const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - - if (input < 0x80) - *length = 1; - else if (input < 0x800) - *length = 2; - else if (input < 0x10000) - *length = 3; - else if (input < 0x200000) - *length = 4; - else - { *length = 0; return; } // This code won't covert this correctly anyway. - - output += *length; - - // Scary scary fall throughs. - switch (*length) - { - case 4: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 3: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 2: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 1: - --output; - *output = (char)(input | FIRST_BYTE_MARK[*length]); - } -} - - -/*static*/ int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding /*encoding*/) -{ - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - -// if (encoding == TIXML_ENCODING_UTF8) -// { - if (anyByte < 127) - return isalpha(anyByte); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. -// } -// else -// { -// return isalpha(anyByte); -// } -} - - -/*static*/ int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding /*encoding*/) -{ - // This will only work for low-ascii, everything else is assumed to be a valid - // letter. I'm not sure this is the best approach, but it is quite tricky trying - // to figure out alhabetical vs. not across encoding. So take a very - // conservative approach. - -// if (encoding == TIXML_ENCODING_UTF8) -// { - if (anyByte < 127) - return isalnum(anyByte); - else - return 1; // What else to do? The unicode set is huge...get the english ones right. -// } -// else -// { -// return isalnum(anyByte); -// } -} - - -class TiXmlParsingData -{ - friend class TiXmlDocument; - public: - void Stamp(const char* now, TiXmlEncoding encoding); - - const TiXmlCursor& Cursor() { return cursor; } - - private: - // Only used by the document! - TiXmlParsingData(const char* start, int _tabsize, int row, int col) - { - assert(start); - stamp = start; - tabsize = _tabsize; - cursor.row = row; - cursor.col = col; - } - - TiXmlCursor cursor; - const char* stamp; - int tabsize; -}; - - -void TiXmlParsingData::Stamp(const char* now, TiXmlEncoding encoding) -{ - assert(now); - - // Do nothing if the tabsize is 0. - if (tabsize < 1) - { - return; - } - - // Get the current row, column. - int row = cursor.row; - int col = cursor.col; - const char* p = stamp; - assert(p); - - while (p < now) - { - // Treat p as unsigned, so we have a happy compiler. - const unsigned char* pU = (const unsigned char*)p; - - // Code contributed by Fletcher Dunn: (modified by lee) - switch (*pU) { - case 0: - // We *should* never get here, but in case we do, don't - // advance past the terminating null character, ever - return; - - case '\r': - // bump down to the next line - ++row; - col = 0; - // Eat the character - ++p; - - // Check for \r\n sequence, and treat this as a single character - if (*p == '\n') { - ++p; - } - break; - - case '\n': - // bump down to the next line - ++row; - col = 0; - - // Eat the character - ++p; - - // Check for \n\r sequence, and treat this as a single - // character. (Yes, this bizarre thing does occur still - // on some arcane platforms...) - if (*p == '\r') { - ++p; - } - break; - - case '\t': - // Eat the character - ++p; - - // Skip to next tab stop - col = (col / tabsize + 1) * tabsize; - break; - - case TIXML_UTF_LEAD_0: - if (encoding == TIXML_ENCODING_UTF8) - { - if (*(p+1) && *(p+2)) - { - // In these cases, don't advance the column. These are - // 0-width spaces. - if (*(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2) - p += 3; - else if (*(pU+1)==0xbfU && *(pU+2)==0xbeU) - p += 3; - else if (*(pU+1)==0xbfU && *(pU+2)==0xbfU) - p += 3; - else - { p +=3; ++col; } // A normal character. - } - } - else - { - ++p; - ++col; - } - break; - - default: - if (encoding == TIXML_ENCODING_UTF8) - { - // Eat the 1 to 4 byte utf8 character. - int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)]; - if (step == 0) - step = 1; // Error case from bad encoding, but handle gracefully. - p += step; - - // Just advance one column, of course. - ++col; - } - else - { - ++p; - ++col; - } - break; - } - } - cursor.row = row; - cursor.col = col; - assert(cursor.row >= -1); - assert(cursor.col >= -1); - stamp = p; - assert(stamp); -} - - -const char* TiXmlBase::SkipWhiteSpace(const char* p, TiXmlEncoding encoding) -{ - if (!p || !*p) - { - return 0; - } - if (encoding == TIXML_ENCODING_UTF8) - { - while (*p) - { - const unsigned char* pU = (const unsigned char*)p; - - // Skip the stupid Microsoft UTF-8 Byte order marks - if ( *(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==TIXML_UTF_LEAD_1 - && *(pU+2)==TIXML_UTF_LEAD_2) - { - p += 3; - continue; - } - else if (*(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==0xbfU - && *(pU+2)==0xbeU) - { - p += 3; - continue; - } - else if (*(pU+0)==TIXML_UTF_LEAD_0 - && *(pU+1)==0xbfU - && *(pU+2)==0xbfU) - { - p += 3; - continue; - } - - if (IsWhiteSpace(*p) || *p == '\n' || *p =='\r') // Still using old rules for white space. - ++p; - else - break; - } - } - else - { - while (*p && IsWhiteSpace(*p) || *p == '\n' || *p =='\r') - ++p; - } - - return p; -} - -#ifdef TIXML_USE_STL -/*static*/ bool TiXmlBase::StreamWhiteSpace(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - for (;;) - { - if (!in->good()) return false; - - int c = in->peek(); - // At this scope, we can't get to a document. So fail silently. - if (!IsWhiteSpace(c) || c <= 0) - return true; - - *tag += (char) in->get(); - } -} - -/*static*/ bool TiXmlBase::StreamTo(TIXML_ISTREAM * in, int character, TIXML_STRING * tag) -{ - //assert(character > 0 && character < 128); // else it won't work in utf-8 - while (in->good()) - { - int c = in->peek(); - if (c == character) - return true; - if (c <= 0) // Silent failure: can't get document at this scope - return false; - - in->get(); - *tag += (char) c; - } - return false; -} -#endif - -const char* TiXmlBase::ReadName(const char* p, TIXML_STRING * name, TiXmlEncoding encoding) -{ - *name = ""; - assert(p); - - // Names start with letters or underscores. - // Of course, in unicode, tinyxml has no idea what a letter *is*. The - // algorithm is generous. - // - // After that, they can be letters, underscores, numbers, - // hyphens, or colons. (Colons are valid ony for namespaces, - // but tinyxml can't tell namespaces from names.) - if ( p && *p - && (IsAlpha((unsigned char) *p, encoding) || *p == '_')) - { - while ( p && *p - && ( IsAlphaNum((unsigned char) *p, encoding) - || *p == '_' - || *p == '-' - || *p == '.' - || *p == ':')) - { - (*name) += *p; - ++p; - } - return p; - } - return 0; -} - -const char* TiXmlBase::GetEntity(const char* p, char* value, int* length, TiXmlEncoding encoding) -{ - // Presume an entity, and pull it out. - TIXML_STRING ent; - int i; - *length = 0; - - if (*(p+1) && *(p+1) == '#' && *(p+2)) - { - unsigned long ucs = 0; - ptrdiff_t delta = 0; - unsigned mult = 1; - - if (*(p+2) == 'x') - { - // Hexadecimal. - if (!*(p+3)) return 0; - - const char* q = p+3; - q = strchr(q, ';'); - - if (!q || !*q) return 0; - - delta = q-p; - --q; - - while (*q != 'x') - { - if (*q >= '0' && *q <= '9') - ucs += mult * (*q - '0'); - else if (*q >= 'a' && *q <= 'f') - ucs += mult * (*q - 'a' + 10); - else if (*q >= 'A' && *q <= 'F') - ucs += mult * (*q - 'A' + 10); - else - return 0; - mult *= 16; - --q; - } - } - else - { - // Decimal. - if (!*(p+2)) return 0; - - const char* q = p+2; - q = strchr(q, ';'); - - if (!q || !*q) return 0; - - delta = q-p; - --q; - - while (*q != '#') - { - if (*q >= '0' && *q <= '9') - ucs += mult * (*q - '0'); - else - return 0; - mult *= 10; - --q; - } - } - if (encoding == TIXML_ENCODING_UTF8) - { - // convert the UCS to UTF-8 - ConvertUTF32ToUTF8(ucs, value, length); - } - else - { - *value = (char)ucs; - *length = 1; - } - return p + delta + 1; - } - - // Now try to match it. - for (i=0; i<NUM_ENTITY; ++i) - { - if (strncmp(entity[i].str, p, entity[i].strLength) == 0) - { - assert(strlen(entity[i].str) == entity[i].strLength); - *value = entity[i].chr; - *length = 1; - return (p + entity[i].strLength); - } - } - - // So it wasn't an entity, its unrecognized, or something like that. - *value = *p; // Don't put back the last one, since we return it! - //*length = 1; // Leave unrecognized entities - this doesn't really work. - // Just writes strange XML. - return p+1; -} - - -bool TiXmlBase::StringEqual(const char* p, - const char* tag, - bool ignoreCase, - TiXmlEncoding encoding) -{ - assert(p); - assert(tag); - if (!p || !*p) - { - assert(0); - return false; - } - - const char* q = p; - - if (ignoreCase) - { - while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding)) - { - ++q; - ++tag; - } - - if (*tag == 0) - return true; - } - else - { - while (*q && *tag && *q == *tag) - { - ++q; - ++tag; - } - - if (*tag == 0) // Have we found the end of the tag, and everything equal? - return true; - } - return false; -} - -const char* TiXmlBase::ReadText( const char* p, - TIXML_STRING * text, - bool trimWhiteSpace, - const char* endTag, - bool caseInsensitive, - TiXmlEncoding encoding) -{ - *text = ""; - if ( !trimWhiteSpace // certain tags always keep whitespace - || !condenseWhiteSpace) // if true, whitespace is always kept - { - // Keep all the white space. - while ( p && *p - && !StringEqual(p, endTag, caseInsensitive, encoding) - ) - { - int len; - char cArr[4] = { 0, 0, 0, 0 }; - p = GetChar(p, cArr, &len, encoding); - text->append(cArr, len); - } - } - else - { - bool whitespace = false; - - // Remove leading white space: - p = SkipWhiteSpace(p, encoding); - while ( p && *p - && !StringEqual(p, endTag, caseInsensitive, encoding)) - { - if (*p == '\r' || *p == '\n') - { - whitespace = true; - ++p; - } - else if (IsWhiteSpace(*p)) - { - whitespace = true; - ++p; - } - else - { - // If we've found whitespace, add it before the - // new character. Any whitespace just becomes a space. - if (whitespace) - { - (*text) += ' '; - whitespace = false; - } - int len; - char cArr[4] = { 0, 0, 0, 0 }; - p = GetChar(p, cArr, &len, encoding); - if (len == 1) - (*text) += cArr[0]; // more efficient - else - text->append(cArr, len); - } - } - } - return p + strlen(endTag); -} - -#ifdef TIXML_USE_STL - -void TiXmlDocument::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - // The basic issue with a document is that we don't know what we're - // streaming. Read something presumed to be a tag (and hope), then - // identify it, and call the appropriate stream method on the tag. - // - // This "pre-streaming" will never read the closing ">" so the - // sub-tag can orient itself. - - if (!StreamTo(in, '<', tag)) - { - SetError(TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - - while (in->good()) - { - int tagIndex = (int) tag->length(); - while (in->good() && in->peek() != '>') - { - int c = in->get(); - if (c <= 0) - { - SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - break; - } - (*tag) += (char) c; - } - - if (in->good()) - { - // We now have something we presume to be a node of - // some sort. Identify it, and call the node to - // continue streaming. - TiXmlNode* node = Identify(tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING); - - if (node) - { - node->StreamIn(in, tag); - bool isElement = node->ToElement() != 0; - delete node; - node = 0; - - // If this is the root element, we're done. Parsing will be - // done by the >> operator. - if (isElement) - { - return; - } - } - else - { - SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - } - } - // We should have returned sooner. - SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN); -} - -#endif - -const char* TiXmlDocument::Parse(const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding) -{ - ClearError(); - - // Parse away, at the document level. Since a document - // contains nothing but other tags, most of what happens - // here is skipping white space. - if (!p || !*p) - { - SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN); - return 0; - } - - // Note that, for a document, this needs to come - // before the while space skip, so that parsing - // starts from the pointer we are given. - location.Clear(); - if (prevData) - { - location.row = prevData->cursor.row; - location.col = prevData->cursor.col; - } - else - { - location.row = 0; - location.col = 0; - } - TiXmlParsingData data(p, TabSize(), location.row, location.col); - location = data.Cursor(); - - if (encoding == TIXML_ENCODING_UNKNOWN) - { - // Check for the Microsoft UTF-8 lead bytes. - const unsigned char* pU = (const unsigned char*)p; - if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0 - && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1 - && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2) - { - encoding = TIXML_ENCODING_UTF8; - useMicrosoftBOM = true; - } - } - - p = SkipWhiteSpace(p, encoding); - if (!p) - { - SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN); - return 0; - } - - while (p && *p) - { - TiXmlNode* node = Identify(p, encoding); - if (node) - { - p = node->Parse(p, &data, encoding); - LinkEndChild(node); - } - else - { - break; - } - - // Did we get encoding info? - if ( encoding == TIXML_ENCODING_UNKNOWN - && node->ToDeclaration()) - { - TiXmlDeclaration* dec = node->ToDeclaration(); - const char* enc = dec->Encoding(); - assert(enc); - - if (*enc == 0) - encoding = TIXML_ENCODING_UTF8; - else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN)) - encoding = TIXML_ENCODING_UTF8; - else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN)) - encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice - else - encoding = TIXML_ENCODING_LEGACY; - } - - p = SkipWhiteSpace(p, encoding); - } - - // Was this empty? - if (!firstChild) { - SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding); - return 0; - } - - // All is well. - return p; -} - -void TiXmlDocument::SetError(int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - // The first error in a chain is more accurate - don't set again! - if (error) - return; - - assert(err > 0 && err < TIXML_ERROR_STRING_COUNT); - error = true; - errorId = err; - errorDesc = errorString[ errorId ]; - - errorLocation.Clear(); - if (pError && data) - { - data->Stamp(pError, encoding); - errorLocation = data->Cursor(); - } -} - - -TiXmlNode* TiXmlNode::Identify(const char* p, TiXmlEncoding encoding) -{ - TiXmlNode* returnNode = 0; - - p = SkipWhiteSpace(p, encoding); - if (!p || !*p || *p != '<') - { - return 0; - } - - TiXmlDocument* doc = GetDocument(); - p = SkipWhiteSpace(p, encoding); - - if (!p || !*p) - { - return 0; - } - - // What is this thing? - // - Elements start with a letter or underscore, but xml is reserved. - // - Comments: <!-- - // - Decleration: <?xml - // - Everthing else is unknown to tinyxml. - // - - const char* xmlHeader = { "<?xml" }; - const char* commentHeader = { "<!--" }; - const char* dtdHeader = { "<!" }; - const char* cdataHeader = { "<![CDATA[" }; - - if (StringEqual(p, xmlHeader, true, encoding)) - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing Declaration\n"); - #endif - returnNode = new TiXmlDeclaration(); - } - else if (StringEqual(p, commentHeader, false, encoding)) - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing Comment\n"); - #endif - returnNode = new TiXmlComment(); - } - else if (StringEqual(p, cdataHeader, false, encoding)) - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing CDATA\n"); - #endif - TiXmlText* text = new TiXmlText(""); - text->SetCDATA(true); - returnNode = text; - } - else if (StringEqual(p, dtdHeader, false, encoding)) - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing Unknown(1)\n"); - #endif - returnNode = new TiXmlUnknown(); - } - else if ( IsAlpha(*(p+1), encoding) - || *(p+1) == '_') - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing Element\n"); - #endif - returnNode = new TiXmlElement(""); - } - else - { - #ifdef DEBUG_PARSER - TIXML_LOG("XML parsing Unknown(2)\n"); - #endif - returnNode = new TiXmlUnknown(); - } - - if (returnNode) - { - // Set the parent, so it can report errors - returnNode->parent = this; - } - else - { - if (doc) - doc->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN); - } - return returnNode; -} - -#ifdef TIXML_USE_STL - -void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - // We're called with some amount of pre-parsing. That is, some of "this" - // element is in "tag". Go ahead and stream to the closing ">" - while (in->good()) - { - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - (*tag) += (char) c ; - - if (c == '>') - break; - } - - if (tag->length() < 3) return; - - // Okay...if we are a "/>" tag, then we're done. We've read a complete tag. - // If not, identify and stream. - - if ( tag->at(tag->length() - 1) == '>' - && tag->at(tag->length() - 2) == '/') - { - // All good! - return; - } - else if (tag->at(tag->length() - 1) == '>') - { - // There is more. Could be: - // text - // closing tag - // another node. - for (;;) - { - StreamWhiteSpace(in, tag); - - // Do we have text? - if (in->good() && in->peek() != '<') - { - // Yep, text. - TiXmlText text(""); - text.StreamIn(in, tag); - - // What follows text is a closing tag or another node. - // Go around again and figure it out. - continue; - } - - // We now have either a closing tag...or another node. - // We should be at a "<", regardless. - if (!in->good()) return; - assert(in->peek() == '<'); - int tagIndex = (int) tag->length(); - - bool closingTag = false; - bool firstCharFound = false; - - for (;;) - { - if (!in->good()) - return; - - int c = in->peek(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - - if (c == '>') - break; - - *tag += (char) c; - in->get(); - - if (!firstCharFound && c != '<' && !IsWhiteSpace(c)) - { - firstCharFound = true; - if (c == '/') - closingTag = true; - } - } - // If it was a closing tag, then read in the closing '>' to clean up the input stream. - // If it was not, the streaming will be done by the tag. - if (closingTag) - { - if (!in->good()) - return; - - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - assert(c == '>'); - *tag += (char) c; - - // We are done, once we've found our closing tag. - return; - } - else - { - // If not a closing tag, id it, and stream. - const char* tagloc = tag->c_str() + tagIndex; - TiXmlNode* node = Identify(tagloc, TIXML_DEFAULT_ENCODING); - if (!node) - return; - node->StreamIn(in, tag); - delete node; - node = 0; - - // No return: go around from the beginning: text, closing tag, or node. - } - } - } -} -#endif - -const char* TiXmlElement::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - p = SkipWhiteSpace(p, encoding); - TiXmlDocument* document = GetDocument(); - - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding); - return 0; - } - - if (data) - { - data->Stamp(p, encoding); - location = data->Cursor(); - } - - if (*p != '<') - { - if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding); - return 0; - } - - p = SkipWhiteSpace(p+1, encoding); - - // Read the name. - const char* pErr = p; - - p = ReadName(p, &value, encoding); - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding); - return 0; - } - - TIXML_STRING endTag ("</"); - endTag += value; - endTag += ">"; - - // Check for and read attributes. Also look for an empty - // tag or an end tag. - while (p && *p) - { - pErr = p; - p = SkipWhiteSpace(p, encoding); - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding); - return 0; - } - if (*p == '/') - { - ++p; - // Empty tag. - if (*p != '>') - { - if (document) document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data, encoding); - return 0; - } - return (p+1); - } - else if (*p == '>') - { - // Done with attributes (if there were any.) - // Read the value -- which can include other - // elements -- read the end tag, and return. - ++p; - p = ReadValue(p, data, encoding); // Note this is an Element method, and will set the error if one happens. - if (!p || !*p) - return 0; - - // We should find the end tag now - if (StringEqual(p, endTag.c_str(), false, encoding)) - { - p += endTag.length(); - return p; - } - else - { - if (document) document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding); - return 0; - } - } - else - { - // Try to read an attribute: - TiXmlAttribute* attrib = new TiXmlAttribute(); - if (!attrib) - { - if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding); - return 0; - } - - attrib->SetDocument(document); - const char* pErr = p; - p = attrib->Parse(p, data, encoding); - - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding); - delete attrib; - return 0; - } - - // Handle the strange case of double attributes: - TiXmlAttribute* node = attributeSet.Find(attrib->NameTStr()); - if (node) - { - node->SetValue(attrib->Value()); - delete attrib; - return 0; - } - - attributeSet.Add(attrib); - } - } - return p; -} - - -const char* TiXmlElement::ReadValue(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - TiXmlDocument* document = GetDocument(); - - // Read in text and elements in any order. - const char* pWithWhiteSpace = p; - p = SkipWhiteSpace(p, encoding); - - while (p && *p) - { - if (*p != '<') - { - // Take what we have, make a text element. - TiXmlText* textNode = new TiXmlText(""); - - if (!textNode) - { - if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding); - return 0; - } - - if (TiXmlBase::IsWhiteSpaceCondensed()) - { - p = textNode->Parse(p, data, encoding); - } - else - { - // Special case: we want to keep the white space - // so that leading spaces aren't removed. - p = textNode->Parse(pWithWhiteSpace, data, encoding); - } - - if (!textNode->Blank()) - LinkEndChild(textNode); - else - delete textNode; - } - else - { - // We hit a '<' - // Have we hit a new element or an end tag? This could also be - // a TiXmlText in the "CDATA" style. - if (StringEqual(p, "</", false, encoding)) - { - return p; - } - else - { - TiXmlNode* node = Identify(p, encoding); - if (node) - { - p = node->Parse(p, data, encoding); - LinkEndChild(node); - } - else - { - return 0; - } - } - } - pWithWhiteSpace = p; - p = SkipWhiteSpace(p, encoding); - } - - if (!p) - { - if (document) document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding); - } - return p; -} - - -#ifdef TIXML_USE_STL -void TiXmlUnknown::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - while (in->good()) - { - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - (*tag) += (char) c; - - if (c == '>') - { - // All is well. - return; - } - } -} -#endif - - -const char* TiXmlUnknown::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - TiXmlDocument* document = GetDocument(); - p = SkipWhiteSpace(p, encoding); - - if (data) - { - data->Stamp(p, encoding); - location = data->Cursor(); - } - if (!p || !*p || *p != '<') - { - if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding); - return 0; - } - ++p; - value = ""; - - while (p && *p && *p != '>') - { - value += *p; - ++p; - } - - if (!p) - { - if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding); - } - if (*p == '>') - return p+1; - return p; -} - -#ifdef TIXML_USE_STL -void TiXmlComment::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - while (in->good()) - { - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - - (*tag) += (char) c; - - if (c == '>' - && tag->at(tag->length() - 2) == '-' - && tag->at(tag->length() - 3) == '-') - { - // All is well. - return; - } - } -} -#endif - - -const char* TiXmlComment::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - TiXmlDocument* document = GetDocument(); - value = ""; - - p = SkipWhiteSpace(p, encoding); - - if (data) - { - data->Stamp(p, encoding); - location = data->Cursor(); - } - const char* startTag = "<!--"; - const char* endTag = "-->"; - - if (!StringEqual(p, startTag, false, encoding)) - { - document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding); - return 0; - } - p += strlen(startTag); - p = ReadText(p, &value, false, endTag, false, encoding); - return p; -} - - -const char* TiXmlAttribute::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - p = SkipWhiteSpace(p, encoding); - if (!p || !*p) return 0; - -// int tabsize = 4; -// if (document) -// tabsize = document->TabSize(); - - if (data) - { - data->Stamp(p, encoding); - location = data->Cursor(); - } - // Read the name, the '=' and the value. - const char* pErr = p; - p = ReadName(p, &name, encoding); - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding); - return 0; - } - p = SkipWhiteSpace(p, encoding); - if (!p || !*p || *p != '=') - { - if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding); - return 0; - } - - ++p; // skip '=' - p = SkipWhiteSpace(p, encoding); - if (!p || !*p) - { - if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding); - return 0; - } - - const char* end; - const char SINGLE_QUOTE = '\''; - const char DOUBLE_QUOTE = '\"'; - - if (*p == SINGLE_QUOTE) - { - ++p; - end = "\'"; // single quote in string - p = ReadText(p, &value, false, end, false, encoding); - } - else if (*p == DOUBLE_QUOTE) - { - ++p; - end = "\""; // double quote in string - p = ReadText(p, &value, false, end, false, encoding); - } - else - { - // All attribute values should be in single or double quotes. - // But this is such a common error that the parser will try - // its best, even without them. - value = ""; - while ( p && *p // existence - && !IsWhiteSpace(*p) && *p != '\n' && *p != '\r' // whitespace - && *p != '/' && *p != '>') // tag end - { - if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE) { - // [ 1451649 ] Attribute values with trailing quotes not handled correctly - // We did not have an opening quote but seem to have a - // closing one. Give up and throw an error. - if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding); - return 0; - } - value += *p; - ++p; - } - } - return p; -} - -#ifdef TIXML_USE_STL -void TiXmlText::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - if (cdata) - { - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - - (*tag) += (char) c; - - if (c == '>' - && tag->at(tag->length() - 2) == ']' - && tag->at(tag->length() - 3) == ']') - { - // All is well. - return; - } - } - else - { - while (in->good()) - { - int c = in->peek(); - if (c == '<') - return; - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - - (*tag) += (char) c; - in->get(); - } - } -} -#endif - -const char* TiXmlText::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding) -{ - value = ""; - TiXmlDocument* document = GetDocument(); - - if (data) - { - data->Stamp(p, encoding); - location = data->Cursor(); - } - - const char* const startTag = "<![CDATA["; - const char* const endTag = "]]>"; - - if (cdata || StringEqual(p, startTag, false, encoding)) - { - cdata = true; - - if (!StringEqual(p, startTag, false, encoding)) - { - document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding); - return 0; - } - p += strlen(startTag); - - // Keep all the white space, ignore the encoding, etc. - while ( p && *p - && !StringEqual(p, endTag, false, encoding) - ) - { - value += *p; - ++p; - } - - TIXML_STRING dummy; - p = ReadText(p, &dummy, false, endTag, false, encoding); - return p; - } - else - { - bool ignoreWhite = true; - - const char* end = "<"; - p = ReadText(p, &value, ignoreWhite, end, false, encoding); - if (p) - return p-1; // don't truncate the '<' - return 0; - } -} - -#ifdef TIXML_USE_STL -void TiXmlDeclaration::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag) -{ - while (in->good()) - { - int c = in->get(); - if (c <= 0) - { - TiXmlDocument* document = GetDocument(); - if (document) - document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN); - return; - } - (*tag) += (char) c; - - if (c == '>') - { - // All is well. - return; - } - } -} -#endif - -const char* TiXmlDeclaration::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding) -{ - p = SkipWhiteSpace(p, _encoding); - // Find the beginning, find the end, and look for - // the stuff in-between. - TiXmlDocument* document = GetDocument(); - if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding)) - { - if (document) document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding); - return 0; - } - if (data) - { - data->Stamp(p, _encoding); - location = data->Cursor(); - } - p += 5; - - version = ""; - encoding = ""; - standalone = ""; - - while (p && *p) - { - if (*p == '>') - { - ++p; - return p; - } - - p = SkipWhiteSpace(p, _encoding); - if (StringEqual(p, "version", true, _encoding)) - { - TiXmlAttribute attrib; - p = attrib.Parse(p, data, _encoding); - version = attrib.Value(); - } - else if (StringEqual(p, "encoding", true, _encoding)) - { - TiXmlAttribute attrib; - p = attrib.Parse(p, data, _encoding); - encoding = attrib.Value(); - } - else if (StringEqual(p, "standalone", true, _encoding)) - { - TiXmlAttribute attrib; - p = attrib.Parse(p, data, _encoding); - standalone = attrib.Value(); - } - else - { - // Read over whatever it is. - while (p && *p && *p != '>' && !IsWhiteSpace(*p)) - ++p; - } - } - return 0; -} - -bool TiXmlText::Blank() const -{ - for (unsigned i=0; i<value.length(); i++) - if (!IsWhiteSpace(value[i])) - return false; - return true; -} - +/*
+www.sourceforge.net/projects/tinyxml
+Original code (2.0 and earlier)copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any
+damages arising from the use of this software.
+
+Permission is granted to anyone to use this software for any
+purpose, including commercial applications, and to alter it and
+redistribute it freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must
+not claim that you wrote the original software. If you use this
+software in a product, an acknowledgment in the product documentation
+would be appreciated but is not required.
+
+2. Altered source versions must be plainly marked as such, and
+must not be misrepresented as being the original software.
+
+3. This notice may not be removed or altered from any source
+distribution.
+
+===============================================================================
+
+UserinfoEx plugin for Miranda IM
+
+Copyright:
+© 2006-2010 DeathAxe, Yasnovidyashii, Merlin, K. Romanov, Kreol
+===============================================================================
+*/
+
+#include "..\commonheaders.h"
+
+// Note tha "PutString" hardcodes the same list. This
+// is less flexible than it appears. Changing the entries
+// or order will break putstring.
+TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
+{
+ { "&", 5, '&' },
+ { "<", 4, '<' },
+ { ">", 4, '>' },
+ { """, 6, '\"' },
+ { "'", 6, '\'' }
+};
+
+// Bunch of unicode info at:
+// http://www.unicode.org/faq/utf_bom.html
+// Including the basic of this table, which determines the #bytes in the
+// sequence from the lead byte. 1 placed for invalid sequences --
+// although the result will be junk, pass it through as much as possible.
+// Beware of the non-characters in UTF-8:
+// ef bb bf (Microsoft "lead bytes")
+// ef bf be
+// ef bf bf
+
+const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
+const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
+const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
+
+const int TiXmlBase::utf8ByteTable[256] =
+{
+ // 0 1 2 3 4 5 6 7 8 9 a b c d e f
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
+ 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
+ 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
+};
+
+
+void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char* output, int* length)
+{
+ const unsigned long BYTE_MASK = 0xBF;
+ const unsigned long BYTE_MARK = 0x80;
+ const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+ if (input < 0x80)
+ *length = 1;
+ else if (input < 0x800)
+ *length = 2;
+ else if (input < 0x10000)
+ *length = 3;
+ else if (input < 0x200000)
+ *length = 4;
+ else
+ { *length = 0; return; } // This code won't covert this correctly anyway.
+
+ output += *length;
+
+ // Scary scary fall throughs.
+ switch (*length)
+ {
+ case 4:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 3:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 2:
+ --output;
+ *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+ input >>= 6;
+ case 1:
+ --output;
+ *output = (char)(input | FIRST_BYTE_MARK[*length]);
+ }
+}
+
+
+/*static*/ int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding /*encoding*/)
+{
+ // This will only work for low-ascii, everything else is assumed to be a valid
+ // letter. I'm not sure this is the best approach, but it is quite tricky trying
+ // to figure out alhabetical vs. not across encoding. So take a very
+ // conservative approach.
+
+// if (encoding == TIXML_ENCODING_UTF8)
+// {
+ if (anyByte < 127)
+ return isalpha(anyByte);
+ else
+ return 1; // What else to do? The unicode set is huge...get the english ones right.
+// }
+// else
+// {
+// return isalpha(anyByte);
+// }
+}
+
+
+/*static*/ int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding /*encoding*/)
+{
+ // This will only work for low-ascii, everything else is assumed to be a valid
+ // letter. I'm not sure this is the best approach, but it is quite tricky trying
+ // to figure out alhabetical vs. not across encoding. So take a very
+ // conservative approach.
+
+// if (encoding == TIXML_ENCODING_UTF8)
+// {
+ if (anyByte < 127)
+ return isalnum(anyByte);
+ else
+ return 1; // What else to do? The unicode set is huge...get the english ones right.
+// }
+// else
+// {
+// return isalnum(anyByte);
+// }
+}
+
+
+class TiXmlParsingData
+{
+ friend class TiXmlDocument;
+ public:
+ void Stamp(const char* now, TiXmlEncoding encoding);
+
+ const TiXmlCursor& Cursor() { return cursor; }
+
+ private:
+ // Only used by the document!
+ TiXmlParsingData(const char* start, int _tabsize, int row, int col)
+ {
+ assert(start);
+ stamp = start;
+ tabsize = _tabsize;
+ cursor.row = row;
+ cursor.col = col;
+ }
+
+ TiXmlCursor cursor;
+ const char* stamp;
+ int tabsize;
+};
+
+
+void TiXmlParsingData::Stamp(const char* now, TiXmlEncoding encoding)
+{
+ assert(now);
+
+ // Do nothing if the tabsize is 0.
+ if (tabsize < 1)
+ {
+ return;
+ }
+
+ // Get the current row, column.
+ int row = cursor.row;
+ int col = cursor.col;
+ const char* p = stamp;
+ assert(p);
+
+ while (p < now)
+ {
+ // Treat p as unsigned, so we have a happy compiler.
+ const unsigned char* pU = (const unsigned char*)p;
+
+ // Code contributed by Fletcher Dunn: (modified by lee)
+ switch (*pU) {
+ case 0:
+ // We *should* never get here, but in case we do, don't
+ // advance past the terminating null character, ever
+ return;
+
+ case '\r':
+ // bump down to the next line
+ ++row;
+ col = 0;
+ // Eat the character
+ ++p;
+
+ // Check for \r\n sequence, and treat this as a single character
+ if (*p == '\n') {
+ ++p;
+ }
+ break;
+
+ case '\n':
+ // bump down to the next line
+ ++row;
+ col = 0;
+
+ // Eat the character
+ ++p;
+
+ // Check for \n\r sequence, and treat this as a single
+ // character. (Yes, this bizarre thing does occur still
+ // on some arcane platforms...)
+ if (*p == '\r') {
+ ++p;
+ }
+ break;
+
+ case '\t':
+ // Eat the character
+ ++p;
+
+ // Skip to next tab stop
+ col = (col / tabsize + 1) * tabsize;
+ break;
+
+ case TIXML_UTF_LEAD_0:
+ if (encoding == TIXML_ENCODING_UTF8)
+ {
+ if (*(p+1) && *(p+2))
+ {
+ // In these cases, don't advance the column. These are
+ // 0-width spaces.
+ if (*(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2)
+ p += 3;
+ else if (*(pU+1)==0xbfU && *(pU+2)==0xbeU)
+ p += 3;
+ else if (*(pU+1)==0xbfU && *(pU+2)==0xbfU)
+ p += 3;
+ else
+ { p +=3; ++col; } // A normal character.
+ }
+ }
+ else
+ {
+ ++p;
+ ++col;
+ }
+ break;
+
+ default:
+ if (encoding == TIXML_ENCODING_UTF8)
+ {
+ // Eat the 1 to 4 byte utf8 character.
+ int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
+ if (step == 0)
+ step = 1; // Error case from bad encoding, but handle gracefully.
+ p += step;
+
+ // Just advance one column, of course.
+ ++col;
+ }
+ else
+ {
+ ++p;
+ ++col;
+ }
+ break;
+ }
+ }
+ cursor.row = row;
+ cursor.col = col;
+ assert(cursor.row >= -1);
+ assert(cursor.col >= -1);
+ stamp = p;
+ assert(stamp);
+}
+
+
+const char* TiXmlBase::SkipWhiteSpace(const char* p, TiXmlEncoding encoding)
+{
+ if (!p || !*p)
+ {
+ return 0;
+ }
+ if (encoding == TIXML_ENCODING_UTF8)
+ {
+ while (*p)
+ {
+ const unsigned char* pU = (const unsigned char*)p;
+
+ // Skip the stupid Microsoft UTF-8 Byte order marks
+ if ( *(pU+0)==TIXML_UTF_LEAD_0
+ && *(pU+1)==TIXML_UTF_LEAD_1
+ && *(pU+2)==TIXML_UTF_LEAD_2)
+ {
+ p += 3;
+ continue;
+ }
+ else if (*(pU+0)==TIXML_UTF_LEAD_0
+ && *(pU+1)==0xbfU
+ && *(pU+2)==0xbeU)
+ {
+ p += 3;
+ continue;
+ }
+ else if (*(pU+0)==TIXML_UTF_LEAD_0
+ && *(pU+1)==0xbfU
+ && *(pU+2)==0xbfU)
+ {
+ p += 3;
+ continue;
+ }
+
+ if (IsWhiteSpace(*p) || *p == '\n' || *p =='\r') // Still using old rules for white space.
+ ++p;
+ else
+ break;
+ }
+ }
+ else
+ {
+ while (*p && IsWhiteSpace(*p) || *p == '\n' || *p =='\r')
+ ++p;
+ }
+
+ return p;
+}
+
+#ifdef TIXML_USE_STL
+/*static*/ bool TiXmlBase::StreamWhiteSpace(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ for (;;)
+ {
+ if (!in->good()) return false;
+
+ int c = in->peek();
+ // At this scope, we can't get to a document. So fail silently.
+ if (!IsWhiteSpace(c) || c <= 0)
+ return true;
+
+ *tag += (char) in->get();
+ }
+}
+
+/*static*/ bool TiXmlBase::StreamTo(TIXML_ISTREAM * in, int character, TIXML_STRING * tag)
+{
+ //assert(character > 0 && character < 128); // else it won't work in utf-8
+ while (in->good())
+ {
+ int c = in->peek();
+ if (c == character)
+ return true;
+ if (c <= 0) // Silent failure: can't get document at this scope
+ return false;
+
+ in->get();
+ *tag += (char) c;
+ }
+ return false;
+}
+#endif
+
+const char* TiXmlBase::ReadName(const char* p, TIXML_STRING * name, TiXmlEncoding encoding)
+{
+ *name = "";
+ assert(p);
+
+ // Names start with letters or underscores.
+ // Of course, in unicode, tinyxml has no idea what a letter *is*. The
+ // algorithm is generous.
+ //
+ // After that, they can be letters, underscores, numbers,
+ // hyphens, or colons. (Colons are valid ony for namespaces,
+ // but tinyxml can't tell namespaces from names.)
+ if ( p && *p
+ && (IsAlpha((unsigned char) *p, encoding) || *p == '_'))
+ {
+ while ( p && *p
+ && ( IsAlphaNum((unsigned char) *p, encoding)
+ || *p == '_'
+ || *p == '-'
+ || *p == '.'
+ || *p == ':'))
+ {
+ (*name) += *p;
+ ++p;
+ }
+ return p;
+ }
+ return 0;
+}
+
+const char* TiXmlBase::GetEntity(const char* p, char* value, int* length, TiXmlEncoding encoding)
+{
+ // Presume an entity, and pull it out.
+ TIXML_STRING ent;
+ int i;
+ *length = 0;
+
+ if (*(p+1) && *(p+1) == '#' && *(p+2))
+ {
+ unsigned long ucs = 0;
+ ptrdiff_t delta = 0;
+ unsigned mult = 1;
+
+ if (*(p+2) == 'x')
+ {
+ // Hexadecimal.
+ if (!*(p+3)) return 0;
+
+ const char* q = p+3;
+ q = strchr(q, ';');
+
+ if (!q || !*q) return 0;
+
+ delta = q-p;
+ --q;
+
+ while (*q != 'x')
+ {
+ if (*q >= '0' && *q <= '9')
+ ucs += mult * (*q - '0');
+ else if (*q >= 'a' && *q <= 'f')
+ ucs += mult * (*q - 'a' + 10);
+ else if (*q >= 'A' && *q <= 'F')
+ ucs += mult * (*q - 'A' + 10);
+ else
+ return 0;
+ mult *= 16;
+ --q;
+ }
+ }
+ else
+ {
+ // Decimal.
+ if (!*(p+2)) return 0;
+
+ const char* q = p+2;
+ q = strchr(q, ';');
+
+ if (!q || !*q) return 0;
+
+ delta = q-p;
+ --q;
+
+ while (*q != '#')
+ {
+ if (*q >= '0' && *q <= '9')
+ ucs += mult * (*q - '0');
+ else
+ return 0;
+ mult *= 10;
+ --q;
+ }
+ }
+ if (encoding == TIXML_ENCODING_UTF8)
+ {
+ // convert the UCS to UTF-8
+ ConvertUTF32ToUTF8(ucs, value, length);
+ }
+ else
+ {
+ *value = (char)ucs;
+ *length = 1;
+ }
+ return p + delta + 1;
+ }
+
+ // Now try to match it.
+ for (i=0; i<NUM_ENTITY; ++i)
+ {
+ if (strncmp(entity[i].str, p, entity[i].strLength) == 0)
+ {
+ assert(strlen(entity[i].str) == entity[i].strLength);
+ *value = entity[i].chr;
+ *length = 1;
+ return (p + entity[i].strLength);
+ }
+ }
+
+ // So it wasn't an entity, its unrecognized, or something like that.
+ *value = *p; // Don't put back the last one, since we return it!
+ //*length = 1; // Leave unrecognized entities - this doesn't really work.
+ // Just writes strange XML.
+ return p+1;
+}
+
+
+bool TiXmlBase::StringEqual(const char* p,
+ const char* tag,
+ bool ignoreCase,
+ TiXmlEncoding encoding)
+{
+ assert(p);
+ assert(tag);
+ if (!p || !*p)
+ {
+ assert(0);
+ return false;
+ }
+
+ const char* q = p;
+
+ if (ignoreCase)
+ {
+ while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding))
+ {
+ ++q;
+ ++tag;
+ }
+
+ if (*tag == 0)
+ return true;
+ }
+ else
+ {
+ while (*q && *tag && *q == *tag)
+ {
+ ++q;
+ ++tag;
+ }
+
+ if (*tag == 0) // Have we found the end of the tag, and everything equal?
+ return true;
+ }
+ return false;
+}
+
+const char* TiXmlBase::ReadText( const char* p,
+ TIXML_STRING * text,
+ bool trimWhiteSpace,
+ const char* endTag,
+ bool caseInsensitive,
+ TiXmlEncoding encoding)
+{
+ *text = "";
+ if ( !trimWhiteSpace // certain tags always keep whitespace
+ || !condenseWhiteSpace) // if true, whitespace is always kept
+ {
+ // Keep all the white space.
+ while ( p && *p
+ && !StringEqual(p, endTag, caseInsensitive, encoding)
+ )
+ {
+ int len;
+ char cArr[4] = { 0, 0, 0, 0 };
+ p = GetChar(p, cArr, &len, encoding);
+ text->append(cArr, len);
+ }
+ }
+ else
+ {
+ bool whitespace = false;
+
+ // Remove leading white space:
+ p = SkipWhiteSpace(p, encoding);
+ while ( p && *p
+ && !StringEqual(p, endTag, caseInsensitive, encoding))
+ {
+ if (*p == '\r' || *p == '\n')
+ {
+ whitespace = true;
+ ++p;
+ }
+ else if (IsWhiteSpace(*p))
+ {
+ whitespace = true;
+ ++p;
+ }
+ else
+ {
+ // If we've found whitespace, add it before the
+ // new character. Any whitespace just becomes a space.
+ if (whitespace)
+ {
+ (*text) += ' ';
+ whitespace = false;
+ }
+ int len;
+ char cArr[4] = { 0, 0, 0, 0 };
+ p = GetChar(p, cArr, &len, encoding);
+ if (len == 1)
+ (*text) += cArr[0]; // more efficient
+ else
+ text->append(cArr, len);
+ }
+ }
+ }
+ return p + strlen(endTag);
+}
+
+#ifdef TIXML_USE_STL
+
+void TiXmlDocument::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ // The basic issue with a document is that we don't know what we're
+ // streaming. Read something presumed to be a tag (and hope), then
+ // identify it, and call the appropriate stream method on the tag.
+ //
+ // This "pre-streaming" will never read the closing ">" so the
+ // sub-tag can orient itself.
+
+ if (!StreamTo(in, '<', tag))
+ {
+ SetError(TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+
+ while (in->good())
+ {
+ int tagIndex = (int) tag->length();
+ while (in->good() && in->peek() != '>')
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ break;
+ }
+ (*tag) += (char) c;
+ }
+
+ if (in->good())
+ {
+ // We now have something we presume to be a node of
+ // some sort. Identify it, and call the node to
+ // continue streaming.
+ TiXmlNode* node = Identify(tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING);
+
+ if (node)
+ {
+ node->StreamIn(in, tag);
+ bool isElement = node->ToElement() != 0;
+ delete node;
+ node = 0;
+
+ // If this is the root element, we're done. Parsing will be
+ // done by the >> operator.
+ if (isElement)
+ {
+ return;
+ }
+ }
+ else
+ {
+ SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+ }
+ }
+ // We should have returned sooner.
+ SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
+}
+
+#endif
+
+const char* TiXmlDocument::Parse(const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding)
+{
+ ClearError();
+
+ // Parse away, at the document level. Since a document
+ // contains nothing but other tags, most of what happens
+ // here is skipping white space.
+ if (!p || !*p)
+ {
+ SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return 0;
+ }
+
+ // Note that, for a document, this needs to come
+ // before the while space skip, so that parsing
+ // starts from the pointer we are given.
+ location.Clear();
+ if (prevData)
+ {
+ location.row = prevData->cursor.row;
+ location.col = prevData->cursor.col;
+ }
+ else
+ {
+ location.row = 0;
+ location.col = 0;
+ }
+ TiXmlParsingData data(p, TabSize(), location.row, location.col);
+ location = data.Cursor();
+
+ if (encoding == TIXML_ENCODING_UNKNOWN)
+ {
+ // Check for the Microsoft UTF-8 lead bytes.
+ const unsigned char* pU = (const unsigned char*)p;
+ if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
+ && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
+ && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2)
+ {
+ encoding = TIXML_ENCODING_UTF8;
+ useMicrosoftBOM = true;
+ }
+ }
+
+ p = SkipWhiteSpace(p, encoding);
+ if (!p)
+ {
+ SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return 0;
+ }
+
+ while (p && *p)
+ {
+ TiXmlNode* node = Identify(p, encoding);
+ if (node)
+ {
+ p = node->Parse(p, &data, encoding);
+ LinkEndChild(node);
+ }
+ else
+ {
+ break;
+ }
+
+ // Did we get encoding info?
+ if ( encoding == TIXML_ENCODING_UNKNOWN
+ && node->ToDeclaration())
+ {
+ TiXmlDeclaration* dec = node->ToDeclaration();
+ const char* enc = dec->Encoding();
+ assert(enc);
+
+ if (*enc == 0)
+ encoding = TIXML_ENCODING_UTF8;
+ else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN))
+ encoding = TIXML_ENCODING_UTF8;
+ else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN))
+ encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
+ else
+ encoding = TIXML_ENCODING_LEGACY;
+ }
+
+ p = SkipWhiteSpace(p, encoding);
+ }
+
+ // Was this empty?
+ if (!firstChild) {
+ SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding);
+ return 0;
+ }
+
+ // All is well.
+ return p;
+}
+
+void TiXmlDocument::SetError(int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ // The first error in a chain is more accurate - don't set again!
+ if (error)
+ return;
+
+ assert(err > 0 && err < TIXML_ERROR_STRING_COUNT);
+ error = true;
+ errorId = err;
+ errorDesc = errorString[ errorId ];
+
+ errorLocation.Clear();
+ if (pError && data)
+ {
+ data->Stamp(pError, encoding);
+ errorLocation = data->Cursor();
+ }
+}
+
+
+TiXmlNode* TiXmlNode::Identify(const char* p, TiXmlEncoding encoding)
+{
+ TiXmlNode* returnNode = 0;
+
+ p = SkipWhiteSpace(p, encoding);
+ if (!p || !*p || *p != '<')
+ {
+ return 0;
+ }
+
+ TiXmlDocument* doc = GetDocument();
+ p = SkipWhiteSpace(p, encoding);
+
+ if (!p || !*p)
+ {
+ return 0;
+ }
+
+ // What is this thing?
+ // - Elements start with a letter or underscore, but xml is reserved.
+ // - Comments: <!--
+ // - Decleration: <?xml
+ // - Everthing else is unknown to tinyxml.
+ //
+
+ const char* xmlHeader = { "<?xml" };
+ const char* commentHeader = { "<!--" };
+ const char* dtdHeader = { "<!" };
+ const char* cdataHeader = { "<![CDATA[" };
+
+ if (StringEqual(p, xmlHeader, true, encoding))
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing Declaration\n");
+ #endif
+ returnNode = new TiXmlDeclaration();
+ }
+ else if (StringEqual(p, commentHeader, false, encoding))
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing Comment\n");
+ #endif
+ returnNode = new TiXmlComment();
+ }
+ else if (StringEqual(p, cdataHeader, false, encoding))
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing CDATA\n");
+ #endif
+ TiXmlText* text = new TiXmlText("");
+ text->SetCDATA(true);
+ returnNode = text;
+ }
+ else if (StringEqual(p, dtdHeader, false, encoding))
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing Unknown(1)\n");
+ #endif
+ returnNode = new TiXmlUnknown();
+ }
+ else if ( IsAlpha(*(p+1), encoding)
+ || *(p+1) == '_')
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing Element\n");
+ #endif
+ returnNode = new TiXmlElement("");
+ }
+ else
+ {
+ #ifdef DEBUG_PARSER
+ TIXML_LOG("XML parsing Unknown(2)\n");
+ #endif
+ returnNode = new TiXmlUnknown();
+ }
+
+ if (returnNode)
+ {
+ // Set the parent, so it can report errors
+ returnNode->parent = this;
+ }
+ else
+ {
+ if (doc)
+ doc->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN);
+ }
+ return returnNode;
+}
+
+#ifdef TIXML_USE_STL
+
+void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ // We're called with some amount of pre-parsing. That is, some of "this"
+ // element is in "tag". Go ahead and stream to the closing ">"
+ while (in->good())
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+ (*tag) += (char) c ;
+
+ if (c == '>')
+ break;
+ }
+
+ if (tag->length() < 3) return;
+
+ // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
+ // If not, identify and stream.
+
+ if ( tag->at(tag->length() - 1) == '>'
+ && tag->at(tag->length() - 2) == '/')
+ {
+ // All good!
+ return;
+ }
+ else if (tag->at(tag->length() - 1) == '>')
+ {
+ // There is more. Could be:
+ // text
+ // closing tag
+ // another node.
+ for (;;)
+ {
+ StreamWhiteSpace(in, tag);
+
+ // Do we have text?
+ if (in->good() && in->peek() != '<')
+ {
+ // Yep, text.
+ TiXmlText text("");
+ text.StreamIn(in, tag);
+
+ // What follows text is a closing tag or another node.
+ // Go around again and figure it out.
+ continue;
+ }
+
+ // We now have either a closing tag...or another node.
+ // We should be at a "<", regardless.
+ if (!in->good()) return;
+ assert(in->peek() == '<');
+ int tagIndex = (int) tag->length();
+
+ bool closingTag = false;
+ bool firstCharFound = false;
+
+ for (;;)
+ {
+ if (!in->good())
+ return;
+
+ int c = in->peek();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+
+ if (c == '>')
+ break;
+
+ *tag += (char) c;
+ in->get();
+
+ if (!firstCharFound && c != '<' && !IsWhiteSpace(c))
+ {
+ firstCharFound = true;
+ if (c == '/')
+ closingTag = true;
+ }
+ }
+ // If it was a closing tag, then read in the closing '>' to clean up the input stream.
+ // If it was not, the streaming will be done by the tag.
+ if (closingTag)
+ {
+ if (!in->good())
+ return;
+
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+ assert(c == '>');
+ *tag += (char) c;
+
+ // We are done, once we've found our closing tag.
+ return;
+ }
+ else
+ {
+ // If not a closing tag, id it, and stream.
+ const char* tagloc = tag->c_str() + tagIndex;
+ TiXmlNode* node = Identify(tagloc, TIXML_DEFAULT_ENCODING);
+ if (!node)
+ return;
+ node->StreamIn(in, tag);
+ delete node;
+ node = 0;
+
+ // No return: go around from the beginning: text, closing tag, or node.
+ }
+ }
+ }
+}
+#endif
+
+const char* TiXmlElement::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ p = SkipWhiteSpace(p, encoding);
+ TiXmlDocument* document = GetDocument();
+
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding);
+ return 0;
+ }
+
+ if (data)
+ {
+ data->Stamp(p, encoding);
+ location = data->Cursor();
+ }
+
+ if (*p != '<')
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding);
+ return 0;
+ }
+
+ p = SkipWhiteSpace(p+1, encoding);
+
+ // Read the name.
+ const char* pErr = p;
+
+ p = ReadName(p, &value, encoding);
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding);
+ return 0;
+ }
+
+ TIXML_STRING endTag ("</");
+ endTag += value;
+ endTag += ">";
+
+ // Check for and read attributes. Also look for an empty
+ // tag or an end tag.
+ while (p && *p)
+ {
+ pErr = p;
+ p = SkipWhiteSpace(p, encoding);
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
+ return 0;
+ }
+ if (*p == '/')
+ {
+ ++p;
+ // Empty tag.
+ if (*p != '>')
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data, encoding);
+ return 0;
+ }
+ return (p+1);
+ }
+ else if (*p == '>')
+ {
+ // Done with attributes (if there were any.)
+ // Read the value -- which can include other
+ // elements -- read the end tag, and return.
+ ++p;
+ p = ReadValue(p, data, encoding); // Note this is an Element method, and will set the error if one happens.
+ if (!p || !*p)
+ return 0;
+
+ // We should find the end tag now
+ if (StringEqual(p, endTag.c_str(), false, encoding))
+ {
+ p += endTag.length();
+ return p;
+ }
+ else
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding);
+ return 0;
+ }
+ }
+ else
+ {
+ // Try to read an attribute:
+ TiXmlAttribute* attrib = new TiXmlAttribute();
+ if (!attrib)
+ {
+ if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding);
+ return 0;
+ }
+
+ attrib->SetDocument(document);
+ const char* pErr = p;
+ p = attrib->Parse(p, data, encoding);
+
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding);
+ delete attrib;
+ return 0;
+ }
+
+ // Handle the strange case of double attributes:
+ TiXmlAttribute* node = attributeSet.Find(attrib->NameTStr());
+ if (node)
+ {
+ node->SetValue(attrib->Value());
+ delete attrib;
+ return 0;
+ }
+
+ attributeSet.Add(attrib);
+ }
+ }
+ return p;
+}
+
+
+const char* TiXmlElement::ReadValue(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ TiXmlDocument* document = GetDocument();
+
+ // Read in text and elements in any order.
+ const char* pWithWhiteSpace = p;
+ p = SkipWhiteSpace(p, encoding);
+
+ while (p && *p)
+ {
+ if (*p != '<')
+ {
+ // Take what we have, make a text element.
+ TiXmlText* textNode = new TiXmlText("");
+
+ if (!textNode)
+ {
+ if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding);
+ return 0;
+ }
+
+ if (TiXmlBase::IsWhiteSpaceCondensed())
+ {
+ p = textNode->Parse(p, data, encoding);
+ }
+ else
+ {
+ // Special case: we want to keep the white space
+ // so that leading spaces aren't removed.
+ p = textNode->Parse(pWithWhiteSpace, data, encoding);
+ }
+
+ if (!textNode->Blank())
+ LinkEndChild(textNode);
+ else
+ delete textNode;
+ }
+ else
+ {
+ // We hit a '<'
+ // Have we hit a new element or an end tag? This could also be
+ // a TiXmlText in the "CDATA" style.
+ if (StringEqual(p, "</", false, encoding))
+ {
+ return p;
+ }
+ else
+ {
+ TiXmlNode* node = Identify(p, encoding);
+ if (node)
+ {
+ p = node->Parse(p, data, encoding);
+ LinkEndChild(node);
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ }
+ pWithWhiteSpace = p;
+ p = SkipWhiteSpace(p, encoding);
+ }
+
+ if (!p)
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding);
+ }
+ return p;
+}
+
+
+#ifdef TIXML_USE_STL
+void TiXmlUnknown::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ while (in->good())
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+ (*tag) += (char) c;
+
+ if (c == '>')
+ {
+ // All is well.
+ return;
+ }
+ }
+}
+#endif
+
+
+const char* TiXmlUnknown::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ TiXmlDocument* document = GetDocument();
+ p = SkipWhiteSpace(p, encoding);
+
+ if (data)
+ {
+ data->Stamp(p, encoding);
+ location = data->Cursor();
+ }
+ if (!p || !*p || *p != '<')
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding);
+ return 0;
+ }
+ ++p;
+ value = "";
+
+ while (p && *p && *p != '>')
+ {
+ value += *p;
+ ++p;
+ }
+
+ if (!p)
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding);
+ }
+ if (*p == '>')
+ return p+1;
+ return p;
+}
+
+#ifdef TIXML_USE_STL
+void TiXmlComment::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ while (in->good())
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+
+ (*tag) += (char) c;
+
+ if (c == '>'
+ && tag->at(tag->length() - 2) == '-'
+ && tag->at(tag->length() - 3) == '-')
+ {
+ // All is well.
+ return;
+ }
+ }
+}
+#endif
+
+
+const char* TiXmlComment::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ TiXmlDocument* document = GetDocument();
+ value = "";
+
+ p = SkipWhiteSpace(p, encoding);
+
+ if (data)
+ {
+ data->Stamp(p, encoding);
+ location = data->Cursor();
+ }
+ const char* startTag = "<!--";
+ const char* endTag = "-->";
+
+ if (!StringEqual(p, startTag, false, encoding))
+ {
+ document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding);
+ return 0;
+ }
+ p += strlen(startTag);
+ p = ReadText(p, &value, false, endTag, false, encoding);
+ return p;
+}
+
+
+const char* TiXmlAttribute::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ p = SkipWhiteSpace(p, encoding);
+ if (!p || !*p) return 0;
+
+// int tabsize = 4;
+// if (document)
+// tabsize = document->TabSize();
+
+ if (data)
+ {
+ data->Stamp(p, encoding);
+ location = data->Cursor();
+ }
+ // Read the name, the '=' and the value.
+ const char* pErr = p;
+ p = ReadName(p, &name, encoding);
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
+ return 0;
+ }
+ p = SkipWhiteSpace(p, encoding);
+ if (!p || !*p || *p != '=')
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
+ return 0;
+ }
+
+ ++p; // skip '='
+ p = SkipWhiteSpace(p, encoding);
+ if (!p || !*p)
+ {
+ if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
+ return 0;
+ }
+
+ const char* end;
+ const char SINGLE_QUOTE = '\'';
+ const char DOUBLE_QUOTE = '\"';
+
+ if (*p == SINGLE_QUOTE)
+ {
+ ++p;
+ end = "\'"; // single quote in string
+ p = ReadText(p, &value, false, end, false, encoding);
+ }
+ else if (*p == DOUBLE_QUOTE)
+ {
+ ++p;
+ end = "\""; // double quote in string
+ p = ReadText(p, &value, false, end, false, encoding);
+ }
+ else
+ {
+ // All attribute values should be in single or double quotes.
+ // But this is such a common error that the parser will try
+ // its best, even without them.
+ value = "";
+ while ( p && *p // existence
+ && !IsWhiteSpace(*p) && *p != '\n' && *p != '\r' // whitespace
+ && *p != '/' && *p != '>') // tag end
+ {
+ if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE) {
+ // [ 1451649 ] Attribute values with trailing quotes not handled correctly
+ // We did not have an opening quote but seem to have a
+ // closing one. Give up and throw an error.
+ if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
+ return 0;
+ }
+ value += *p;
+ ++p;
+ }
+ }
+ return p;
+}
+
+#ifdef TIXML_USE_STL
+void TiXmlText::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ if (cdata)
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+
+ (*tag) += (char) c;
+
+ if (c == '>'
+ && tag->at(tag->length() - 2) == ']'
+ && tag->at(tag->length() - 3) == ']')
+ {
+ // All is well.
+ return;
+ }
+ }
+ else
+ {
+ while (in->good())
+ {
+ int c = in->peek();
+ if (c == '<')
+ return;
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+
+ (*tag) += (char) c;
+ in->get();
+ }
+ }
+}
+#endif
+
+const char* TiXmlText::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
+{
+ value = "";
+ TiXmlDocument* document = GetDocument();
+
+ if (data)
+ {
+ data->Stamp(p, encoding);
+ location = data->Cursor();
+ }
+
+ const char* const startTag = "<![CDATA[";
+ const char* const endTag = "]]>";
+
+ if (cdata || StringEqual(p, startTag, false, encoding))
+ {
+ cdata = true;
+
+ if (!StringEqual(p, startTag, false, encoding))
+ {
+ document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding);
+ return 0;
+ }
+ p += strlen(startTag);
+
+ // Keep all the white space, ignore the encoding, etc.
+ while ( p && *p
+ && !StringEqual(p, endTag, false, encoding)
+ )
+ {
+ value += *p;
+ ++p;
+ }
+
+ TIXML_STRING dummy;
+ p = ReadText(p, &dummy, false, endTag, false, encoding);
+ return p;
+ }
+ else
+ {
+ bool ignoreWhite = true;
+
+ const char* end = "<";
+ p = ReadText(p, &value, ignoreWhite, end, false, encoding);
+ if (p)
+ return p-1; // don't truncate the '<'
+ return 0;
+ }
+}
+
+#ifdef TIXML_USE_STL
+void TiXmlDeclaration::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
+{
+ while (in->good())
+ {
+ int c = in->get();
+ if (c <= 0)
+ {
+ TiXmlDocument* document = GetDocument();
+ if (document)
+ document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
+ return;
+ }
+ (*tag) += (char) c;
+
+ if (c == '>')
+ {
+ // All is well.
+ return;
+ }
+ }
+}
+#endif
+
+const char* TiXmlDeclaration::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding)
+{
+ p = SkipWhiteSpace(p, _encoding);
+ // Find the beginning, find the end, and look for
+ // the stuff in-between.
+ TiXmlDocument* document = GetDocument();
+ if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding))
+ {
+ if (document) document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding);
+ return 0;
+ }
+ if (data)
+ {
+ data->Stamp(p, _encoding);
+ location = data->Cursor();
+ }
+ p += 5;
+
+ version = "";
+ encoding = "";
+ standalone = "";
+
+ while (p && *p)
+ {
+ if (*p == '>')
+ {
+ ++p;
+ return p;
+ }
+
+ p = SkipWhiteSpace(p, _encoding);
+ if (StringEqual(p, "version", true, _encoding))
+ {
+ TiXmlAttribute attrib;
+ p = attrib.Parse(p, data, _encoding);
+ version = attrib.Value();
+ }
+ else if (StringEqual(p, "encoding", true, _encoding))
+ {
+ TiXmlAttribute attrib;
+ p = attrib.Parse(p, data, _encoding);
+ encoding = attrib.Value();
+ }
+ else if (StringEqual(p, "standalone", true, _encoding))
+ {
+ TiXmlAttribute attrib;
+ p = attrib.Parse(p, data, _encoding);
+ standalone = attrib.Value();
+ }
+ else
+ {
+ // Read over whatever it is.
+ while (p && *p && *p != '>' && !IsWhiteSpace(*p))
+ ++p;
+ }
+ }
+ return 0;
+}
+
+bool TiXmlText::Blank() const
+{
+ for (unsigned i=0; i<value.length(); i++)
+ if (!IsWhiteSpace(value[i]))
+ return false;
+ return true;
+}
+
|