summaryrefslogtreecommitdiff
path: root/plugins/UserInfoEx/ex_import/tinyxmlparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/UserInfoEx/ex_import/tinyxmlparser.cpp')
-rw-r--r--plugins/UserInfoEx/ex_import/tinyxmlparser.cpp1613
1 files changed, 0 insertions, 1613 deletions
diff --git a/plugins/UserInfoEx/ex_import/tinyxmlparser.cpp b/plugins/UserInfoEx/ex_import/tinyxmlparser.cpp
deleted file mode 100644
index 73f2c18679..0000000000
--- a/plugins/UserInfoEx/ex_import/tinyxmlparser.cpp
+++ /dev/null
@@ -1,1613 +0,0 @@
-/*
-www.sourceforge.net/projects/tinyxml
-Original code (2.0 and earlier)copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
-
-This software is provided 'as-is', without any express or implied
-warranty. In no event will the authors be held liable for any
-damages arising from the use of this software.
-
-Permission is granted to anyone to use this software for any
-purpose, including commercial applications, and to alter it and
-redistribute it freely, subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must
-not claim that you wrote the original software. If you use this
-software in a product, an acknowledgment in the product documentation
-would be appreciated but is not required.
-
-2. Altered source versions must be plainly marked as such, and
-must not be misrepresented as being the original software.
-
-3. This notice may not be removed or altered from any source
-distribution.
-
-===============================================================================
-
-UserinfoEx plugin for Miranda IM
-
-Copyright:
-ฉ 2006-2010 DeathAxe, Yasnovidyashii, Merlin, K. Romanov, Kreol
-
-File name : $HeadURL: https://userinfoex.googlecode.com/svn/trunk/ex_import/tinyxmlparser.cpp $
-Revision : $Revision: 187 $
-Last change on : $Date: 2010-09-08 16:05:54 +0400 (ะกั€, 08 ัะตะฝ 2010) $
-Last change by : $Author: ing.u.horn $
-
-===============================================================================
-*/
-
-#include <ctype.h>
-#include <stddef.h>
-
-#ifdef USE_MMGR
-#include <string.h>
-#include <assert.h>
-#include <stdio.h>
-#include "mmgr.h"
-#endif
-
-#include "tinyxml.h"
-
-//#define DEBUG_PARSER
-#if defined(DEBUG_PARSER)
-# if defined(DEBUG) && defined(_MSC_VER)
-# include <windows.h>
-# define TIXML_LOG OutputDebugString
-# else
-# define TIXML_LOG printf
-# endif
-#endif
-
-// Note tha "PutString" hardcodes the same list. This
-// is less flexible than it appears. Changing the entries
-// or order will break putstring.
-TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
-{
- { "&amp;", 5, '&' },
- { "&lt;", 4, '<' },
- { "&gt;", 4, '>' },
- { "&quot;", 6, '\"' },
- { "&apos;", 6, '\'' }
-};
-
-// Bunch of unicode info at:
-// http://www.unicode.org/faq/utf_bom.html
-// Including the basic of this table, which determines the #bytes in the
-// sequence from the lead byte. 1 placed for invalid sequences --
-// although the result will be junk, pass it through as much as possible.
-// Beware of the non-characters in UTF-8:
-// ef bb bf (Microsoft "lead bytes")
-// ef bf be
-// ef bf bf
-
-const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
-const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
-const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
-
-const int TiXmlBase::utf8ByteTable[256] =
-{
- // 0 1 2 3 4 5 6 7 8 9 a b c d e f
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x10
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x20
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x30
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x50
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x70 End of ASCII range
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x80 0x80 to 0xc1 invalid
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x90
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xa0
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xb0
- 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0 0xc2 to 0xdf 2 byte
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xd0
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0 0xe0 to 0xef 3 byte
- 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
-};
-
-
-void TiXmlBase::ConvertUTF32ToUTF8(unsigned long input, char* output, int* length)
-{
- const unsigned long BYTE_MASK = 0xBF;
- const unsigned long BYTE_MARK = 0x80;
- const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
- if (input < 0x80)
- *length = 1;
- else if (input < 0x800)
- *length = 2;
- else if (input < 0x10000)
- *length = 3;
- else if (input < 0x200000)
- *length = 4;
- else
- { *length = 0; return; } // This code won't covert this correctly anyway.
-
- output += *length;
-
- // Scary scary fall throughs.
- switch (*length)
- {
- case 4:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 3:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 2:
- --output;
- *output = (char)((input | BYTE_MARK) & BYTE_MASK);
- input >>= 6;
- case 1:
- --output;
- *output = (char)(input | FIRST_BYTE_MARK[*length]);
- }
-}
-
-
-/*static*/ int TiXmlBase::IsAlpha(unsigned char anyByte, TiXmlEncoding /*encoding*/)
-{
- // This will only work for low-ascii, everything else is assumed to be a valid
- // letter. I'm not sure this is the best approach, but it is quite tricky trying
- // to figure out alhabetical vs. not across encoding. So take a very
- // conservative approach.
-
-// if (encoding == TIXML_ENCODING_UTF8)
-// {
- if (anyByte < 127)
- return isalpha(anyByte);
- else
- return 1; // What else to do? The unicode set is huge...get the english ones right.
-// }
-// else
-// {
-// return isalpha(anyByte);
-// }
-}
-
-
-/*static*/ int TiXmlBase::IsAlphaNum(unsigned char anyByte, TiXmlEncoding /*encoding*/)
-{
- // This will only work for low-ascii, everything else is assumed to be a valid
- // letter. I'm not sure this is the best approach, but it is quite tricky trying
- // to figure out alhabetical vs. not across encoding. So take a very
- // conservative approach.
-
-// if (encoding == TIXML_ENCODING_UTF8)
-// {
- if (anyByte < 127)
- return isalnum(anyByte);
- else
- return 1; // What else to do? The unicode set is huge...get the english ones right.
-// }
-// else
-// {
-// return isalnum(anyByte);
-// }
-}
-
-
-class TiXmlParsingData
-{
- friend class TiXmlDocument;
- public:
- void Stamp(const char* now, TiXmlEncoding encoding);
-
- const TiXmlCursor& Cursor() { return cursor; }
-
- private:
- // Only used by the document!
- TiXmlParsingData(const char* start, int _tabsize, int row, int col)
- {
- assert(start);
- stamp = start;
- tabsize = _tabsize;
- cursor.row = row;
- cursor.col = col;
- }
-
- TiXmlCursor cursor;
- const char* stamp;
- int tabsize;
-};
-
-
-void TiXmlParsingData::Stamp(const char* now, TiXmlEncoding encoding)
-{
- assert(now);
-
- // Do nothing if the tabsize is 0.
- if (tabsize < 1)
- {
- return;
- }
-
- // Get the current row, column.
- int row = cursor.row;
- int col = cursor.col;
- const char* p = stamp;
- assert(p);
-
- while (p < now)
- {
- // Treat p as unsigned, so we have a happy compiler.
- const unsigned char* pU = (const unsigned char*)p;
-
- // Code contributed by Fletcher Dunn: (modified by lee)
- switch (*pU) {
- case 0:
- // We *should* never get here, but in case we do, don't
- // advance past the terminating null character, ever
- return;
-
- case '\r':
- // bump down to the next line
- ++row;
- col = 0;
- // Eat the character
- ++p;
-
- // Check for \r\n sequence, and treat this as a single character
- if (*p == '\n') {
- ++p;
- }
- break;
-
- case '\n':
- // bump down to the next line
- ++row;
- col = 0;
-
- // Eat the character
- ++p;
-
- // Check for \n\r sequence, and treat this as a single
- // character. (Yes, this bizarre thing does occur still
- // on some arcane platforms...)
- if (*p == '\r') {
- ++p;
- }
- break;
-
- case '\t':
- // Eat the character
- ++p;
-
- // Skip to next tab stop
- col = (col / tabsize + 1) * tabsize;
- break;
-
- case TIXML_UTF_LEAD_0:
- if (encoding == TIXML_ENCODING_UTF8)
- {
- if (*(p+1) && *(p+2))
- {
- // In these cases, don't advance the column. These are
- // 0-width spaces.
- if (*(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2)
- p += 3;
- else if (*(pU+1)==0xbfU && *(pU+2)==0xbeU)
- p += 3;
- else if (*(pU+1)==0xbfU && *(pU+2)==0xbfU)
- p += 3;
- else
- { p +=3; ++col; } // A normal character.
- }
- }
- else
- {
- ++p;
- ++col;
- }
- break;
-
- default:
- if (encoding == TIXML_ENCODING_UTF8)
- {
- // Eat the 1 to 4 byte utf8 character.
- int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
- if (step == 0)
- step = 1; // Error case from bad encoding, but handle gracefully.
- p += step;
-
- // Just advance one column, of course.
- ++col;
- }
- else
- {
- ++p;
- ++col;
- }
- break;
- }
- }
- cursor.row = row;
- cursor.col = col;
- assert(cursor.row >= -1);
- assert(cursor.col >= -1);
- stamp = p;
- assert(stamp);
-}
-
-
-const char* TiXmlBase::SkipWhiteSpace(const char* p, TiXmlEncoding encoding)
-{
- if (!p || !*p)
- {
- return 0;
- }
- if (encoding == TIXML_ENCODING_UTF8)
- {
- while (*p)
- {
- const unsigned char* pU = (const unsigned char*)p;
-
- // Skip the stupid Microsoft UTF-8 Byte order marks
- if ( *(pU+0)==TIXML_UTF_LEAD_0
- && *(pU+1)==TIXML_UTF_LEAD_1
- && *(pU+2)==TIXML_UTF_LEAD_2)
- {
- p += 3;
- continue;
- }
- else if (*(pU+0)==TIXML_UTF_LEAD_0
- && *(pU+1)==0xbfU
- && *(pU+2)==0xbeU)
- {
- p += 3;
- continue;
- }
- else if (*(pU+0)==TIXML_UTF_LEAD_0
- && *(pU+1)==0xbfU
- && *(pU+2)==0xbfU)
- {
- p += 3;
- continue;
- }
-
- if (IsWhiteSpace(*p) || *p == '\n' || *p =='\r') // Still using old rules for white space.
- ++p;
- else
- break;
- }
- }
- else
- {
- while (*p && IsWhiteSpace(*p) || *p == '\n' || *p =='\r')
- ++p;
- }
-
- return p;
-}
-
-#ifdef TIXML_USE_STL
-/*static*/ bool TiXmlBase::StreamWhiteSpace(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- for (;;)
- {
- if (!in->good()) return false;
-
- int c = in->peek();
- // At this scope, we can't get to a document. So fail silently.
- if (!IsWhiteSpace(c) || c <= 0)
- return true;
-
- *tag += (char) in->get();
- }
-}
-
-/*static*/ bool TiXmlBase::StreamTo(TIXML_ISTREAM * in, int character, TIXML_STRING * tag)
-{
- //assert(character > 0 && character < 128); // else it won't work in utf-8
- while (in->good())
- {
- int c = in->peek();
- if (c == character)
- return true;
- if (c <= 0) // Silent failure: can't get document at this scope
- return false;
-
- in->get();
- *tag += (char) c;
- }
- return false;
-}
-#endif
-
-const char* TiXmlBase::ReadName(const char* p, TIXML_STRING * name, TiXmlEncoding encoding)
-{
- *name = "";
- assert(p);
-
- // Names start with letters or underscores.
- // Of course, in unicode, tinyxml has no idea what a letter *is*. The
- // algorithm is generous.
- //
- // After that, they can be letters, underscores, numbers,
- // hyphens, or colons. (Colons are valid ony for namespaces,
- // but tinyxml can't tell namespaces from names.)
- if ( p && *p
- && (IsAlpha((unsigned char) *p, encoding) || *p == '_'))
- {
- while ( p && *p
- && ( IsAlphaNum((unsigned char) *p, encoding)
- || *p == '_'
- || *p == '-'
- || *p == '.'
- || *p == ':'))
- {
- (*name) += *p;
- ++p;
- }
- return p;
- }
- return 0;
-}
-
-const char* TiXmlBase::GetEntity(const char* p, char* value, int* length, TiXmlEncoding encoding)
-{
- // Presume an entity, and pull it out.
- TIXML_STRING ent;
- int i;
- *length = 0;
-
- if (*(p+1) && *(p+1) == '#' && *(p+2))
- {
- unsigned long ucs = 0;
- ptrdiff_t delta = 0;
- unsigned mult = 1;
-
- if (*(p+2) == 'x')
- {
- // Hexadecimal.
- if (!*(p+3)) return 0;
-
- const char* q = p+3;
- q = strchr(q, ';');
-
- if (!q || !*q) return 0;
-
- delta = q-p;
- --q;
-
- while (*q != 'x')
- {
- if (*q >= '0' && *q <= '9')
- ucs += mult * (*q - '0');
- else if (*q >= 'a' && *q <= 'f')
- ucs += mult * (*q - 'a' + 10);
- else if (*q >= 'A' && *q <= 'F')
- ucs += mult * (*q - 'A' + 10);
- else
- return 0;
- mult *= 16;
- --q;
- }
- }
- else
- {
- // Decimal.
- if (!*(p+2)) return 0;
-
- const char* q = p+2;
- q = strchr(q, ';');
-
- if (!q || !*q) return 0;
-
- delta = q-p;
- --q;
-
- while (*q != '#')
- {
- if (*q >= '0' && *q <= '9')
- ucs += mult * (*q - '0');
- else
- return 0;
- mult *= 10;
- --q;
- }
- }
- if (encoding == TIXML_ENCODING_UTF8)
- {
- // convert the UCS to UTF-8
- ConvertUTF32ToUTF8(ucs, value, length);
- }
- else
- {
- *value = (char)ucs;
- *length = 1;
- }
- return p + delta + 1;
- }
-
- // Now try to match it.
- for (i=0; i<NUM_ENTITY; ++i)
- {
- if (strncmp(entity[i].str, p, entity[i].strLength) == 0)
- {
- assert(strlen(entity[i].str) == entity[i].strLength);
- *value = entity[i].chr;
- *length = 1;
- return (p + entity[i].strLength);
- }
- }
-
- // So it wasn't an entity, its unrecognized, or something like that.
- *value = *p; // Don't put back the last one, since we return it!
- //*length = 1; // Leave unrecognized entities - this doesn't really work.
- // Just writes strange XML.
- return p+1;
-}
-
-
-bool TiXmlBase::StringEqual(const char* p,
- const char* tag,
- bool ignoreCase,
- TiXmlEncoding encoding)
-{
- assert(p);
- assert(tag);
- if (!p || !*p)
- {
- assert(0);
- return false;
- }
-
- const char* q = p;
-
- if (ignoreCase)
- {
- while (*q && *tag && ToLower(*q, encoding) == ToLower(*tag, encoding))
- {
- ++q;
- ++tag;
- }
-
- if (*tag == 0)
- return true;
- }
- else
- {
- while (*q && *tag && *q == *tag)
- {
- ++q;
- ++tag;
- }
-
- if (*tag == 0) // Have we found the end of the tag, and everything equal?
- return true;
- }
- return false;
-}
-
-const char* TiXmlBase::ReadText( const char* p,
- TIXML_STRING * text,
- bool trimWhiteSpace,
- const char* endTag,
- bool caseInsensitive,
- TiXmlEncoding encoding)
-{
- *text = "";
- if ( !trimWhiteSpace // certain tags always keep whitespace
- || !condenseWhiteSpace) // if true, whitespace is always kept
- {
- // Keep all the white space.
- while ( p && *p
- && !StringEqual(p, endTag, caseInsensitive, encoding)
- )
- {
- int len;
- char cArr[4] = { 0, 0, 0, 0 };
- p = GetChar(p, cArr, &len, encoding);
- text->append(cArr, len);
- }
- }
- else
- {
- bool whitespace = false;
-
- // Remove leading white space:
- p = SkipWhiteSpace(p, encoding);
- while ( p && *p
- && !StringEqual(p, endTag, caseInsensitive, encoding))
- {
- if (*p == '\r' || *p == '\n')
- {
- whitespace = true;
- ++p;
- }
- else if (IsWhiteSpace(*p))
- {
- whitespace = true;
- ++p;
- }
- else
- {
- // If we've found whitespace, add it before the
- // new character. Any whitespace just becomes a space.
- if (whitespace)
- {
- (*text) += ' ';
- whitespace = false;
- }
- int len;
- char cArr[4] = { 0, 0, 0, 0 };
- p = GetChar(p, cArr, &len, encoding);
- if (len == 1)
- (*text) += cArr[0]; // more efficient
- else
- text->append(cArr, len);
- }
- }
- }
- return p + strlen(endTag);
-}
-
-#ifdef TIXML_USE_STL
-
-void TiXmlDocument::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- // The basic issue with a document is that we don't know what we're
- // streaming. Read something presumed to be a tag (and hope), then
- // identify it, and call the appropriate stream method on the tag.
- //
- // This "pre-streaming" will never read the closing ">" so the
- // sub-tag can orient itself.
-
- if (!StreamTo(in, '<', tag))
- {
- SetError(TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
-
- while (in->good())
- {
- int tagIndex = (int) tag->length();
- while (in->good() && in->peek() != '>')
- {
- int c = in->get();
- if (c <= 0)
- {
- SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- break;
- }
- (*tag) += (char) c;
- }
-
- if (in->good())
- {
- // We now have something we presume to be a node of
- // some sort. Identify it, and call the node to
- // continue streaming.
- TiXmlNode* node = Identify(tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING);
-
- if (node)
- {
- node->StreamIn(in, tag);
- bool isElement = node->ToElement() != 0;
- delete node;
- node = 0;
-
- // If this is the root element, we're done. Parsing will be
- // done by the >> operator.
- if (isElement)
- {
- return;
- }
- }
- else
- {
- SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
- }
- }
- // We should have returned sooner.
- SetError(TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN);
-}
-
-#endif
-
-const char* TiXmlDocument::Parse(const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding)
-{
- ClearError();
-
- // Parse away, at the document level. Since a document
- // contains nothing but other tags, most of what happens
- // here is skipping white space.
- if (!p || !*p)
- {
- SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
- return 0;
- }
-
- // Note that, for a document, this needs to come
- // before the while space skip, so that parsing
- // starts from the pointer we are given.
- location.Clear();
- if (prevData)
- {
- location.row = prevData->cursor.row;
- location.col = prevData->cursor.col;
- }
- else
- {
- location.row = 0;
- location.col = 0;
- }
- TiXmlParsingData data(p, TabSize(), location.row, location.col);
- location = data.Cursor();
-
- if (encoding == TIXML_ENCODING_UNKNOWN)
- {
- // Check for the Microsoft UTF-8 lead bytes.
- const unsigned char* pU = (const unsigned char*)p;
- if ( *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
- && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
- && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2)
- {
- encoding = TIXML_ENCODING_UTF8;
- useMicrosoftBOM = true;
- }
- }
-
- p = SkipWhiteSpace(p, encoding);
- if (!p)
- {
- SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN);
- return 0;
- }
-
- while (p && *p)
- {
- TiXmlNode* node = Identify(p, encoding);
- if (node)
- {
- p = node->Parse(p, &data, encoding);
- LinkEndChild(node);
- }
- else
- {
- break;
- }
-
- // Did we get encoding info?
- if ( encoding == TIXML_ENCODING_UNKNOWN
- && node->ToDeclaration())
- {
- TiXmlDeclaration* dec = node->ToDeclaration();
- const char* enc = dec->Encoding();
- assert(enc);
-
- if (*enc == 0)
- encoding = TIXML_ENCODING_UTF8;
- else if (StringEqual(enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN))
- encoding = TIXML_ENCODING_UTF8;
- else if (StringEqual(enc, "UTF8", true, TIXML_ENCODING_UNKNOWN))
- encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
- else
- encoding = TIXML_ENCODING_LEGACY;
- }
-
- p = SkipWhiteSpace(p, encoding);
- }
-
- // Was this empty?
- if (!firstChild) {
- SetError(TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding);
- return 0;
- }
-
- // All is well.
- return p;
-}
-
-void TiXmlDocument::SetError(int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- // The first error in a chain is more accurate - don't set again!
- if (error)
- return;
-
- assert(err > 0 && err < TIXML_ERROR_STRING_COUNT);
- error = true;
- errorId = err;
- errorDesc = errorString[ errorId ];
-
- errorLocation.Clear();
- if (pError && data)
- {
- data->Stamp(pError, encoding);
- errorLocation = data->Cursor();
- }
-}
-
-
-TiXmlNode* TiXmlNode::Identify(const char* p, TiXmlEncoding encoding)
-{
- TiXmlNode* returnNode = 0;
-
- p = SkipWhiteSpace(p, encoding);
- if (!p || !*p || *p != '<')
- {
- return 0;
- }
-
- TiXmlDocument* doc = GetDocument();
- p = SkipWhiteSpace(p, encoding);
-
- if (!p || !*p)
- {
- return 0;
- }
-
- // What is this thing?
- // - Elements start with a letter or underscore, but xml is reserved.
- // - Comments: <!--
- // - Decleration: <?xml
- // - Everthing else is unknown to tinyxml.
- //
-
- const char* xmlHeader = { "<?xml" };
- const char* commentHeader = { "<!--" };
- const char* dtdHeader = { "<!" };
- const char* cdataHeader = { "<![CDATA[" };
-
- if (StringEqual(p, xmlHeader, true, encoding))
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing Declaration\n");
- #endif
- returnNode = new TiXmlDeclaration();
- }
- else if (StringEqual(p, commentHeader, false, encoding))
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing Comment\n");
- #endif
- returnNode = new TiXmlComment();
- }
- else if (StringEqual(p, cdataHeader, false, encoding))
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing CDATA\n");
- #endif
- TiXmlText* text = new TiXmlText("");
- text->SetCDATA(true);
- returnNode = text;
- }
- else if (StringEqual(p, dtdHeader, false, encoding))
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing Unknown(1)\n");
- #endif
- returnNode = new TiXmlUnknown();
- }
- else if ( IsAlpha(*(p+1), encoding)
- || *(p+1) == '_')
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing Element\n");
- #endif
- returnNode = new TiXmlElement("");
- }
- else
- {
- #ifdef DEBUG_PARSER
- TIXML_LOG("XML parsing Unknown(2)\n");
- #endif
- returnNode = new TiXmlUnknown();
- }
-
- if (returnNode)
- {
- // Set the parent, so it can report errors
- returnNode->parent = this;
- }
- else
- {
- if (doc)
- doc->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN);
- }
- return returnNode;
-}
-
-#ifdef TIXML_USE_STL
-
-void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- // We're called with some amount of pre-parsing. That is, some of "this"
- // element is in "tag". Go ahead and stream to the closing ">"
- while (in->good())
- {
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
- (*tag) += (char) c ;
-
- if (c == '>')
- break;
- }
-
- if (tag->length() < 3) return;
-
- // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
- // If not, identify and stream.
-
- if ( tag->at(tag->length() - 1) == '>'
- && tag->at(tag->length() - 2) == '/')
- {
- // All good!
- return;
- }
- else if (tag->at(tag->length() - 1) == '>')
- {
- // There is more. Could be:
- // text
- // closing tag
- // another node.
- for (;;)
- {
- StreamWhiteSpace(in, tag);
-
- // Do we have text?
- if (in->good() && in->peek() != '<')
- {
- // Yep, text.
- TiXmlText text("");
- text.StreamIn(in, tag);
-
- // What follows text is a closing tag or another node.
- // Go around again and figure it out.
- continue;
- }
-
- // We now have either a closing tag...or another node.
- // We should be at a "<", regardless.
- if (!in->good()) return;
- assert(in->peek() == '<');
- int tagIndex = (int) tag->length();
-
- bool closingTag = false;
- bool firstCharFound = false;
-
- for (;;)
- {
- if (!in->good())
- return;
-
- int c = in->peek();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
-
- if (c == '>')
- break;
-
- *tag += (char) c;
- in->get();
-
- if (!firstCharFound && c != '<' && !IsWhiteSpace(c))
- {
- firstCharFound = true;
- if (c == '/')
- closingTag = true;
- }
- }
- // If it was a closing tag, then read in the closing '>' to clean up the input stream.
- // If it was not, the streaming will be done by the tag.
- if (closingTag)
- {
- if (!in->good())
- return;
-
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
- assert(c == '>');
- *tag += (char) c;
-
- // We are done, once we've found our closing tag.
- return;
- }
- else
- {
- // If not a closing tag, id it, and stream.
- const char* tagloc = tag->c_str() + tagIndex;
- TiXmlNode* node = Identify(tagloc, TIXML_DEFAULT_ENCODING);
- if (!node)
- return;
- node->StreamIn(in, tag);
- delete node;
- node = 0;
-
- // No return: go around from the beginning: text, closing tag, or node.
- }
- }
- }
-}
-#endif
-
-const char* TiXmlElement::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- p = SkipWhiteSpace(p, encoding);
- TiXmlDocument* document = GetDocument();
-
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding);
- return 0;
- }
-
- if (data)
- {
- data->Stamp(p, encoding);
- location = data->Cursor();
- }
-
- if (*p != '<')
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, p, data, encoding);
- return 0;
- }
-
- p = SkipWhiteSpace(p+1, encoding);
-
- // Read the name.
- const char* pErr = p;
-
- p = ReadName(p, &value, encoding);
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding);
- return 0;
- }
-
- TIXML_STRING endTag ("</");
- endTag += value;
- endTag += ">";
-
- // Check for and read attributes. Also look for an empty
- // tag or an end tag.
- while (p && *p)
- {
- pErr = p;
- p = SkipWhiteSpace(p, encoding);
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
- return 0;
- }
- if (*p == '/')
- {
- ++p;
- // Empty tag.
- if (*p != '>')
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_EMPTY, p, data, encoding);
- return 0;
- }
- return (p+1);
- }
- else if (*p == '>')
- {
- // Done with attributes (if there were any.)
- // Read the value -- which can include other
- // elements -- read the end tag, and return.
- ++p;
- p = ReadValue(p, data, encoding); // Note this is an Element method, and will set the error if one happens.
- if (!p || !*p)
- return 0;
-
- // We should find the end tag now
- if (StringEqual(p, endTag.c_str(), false, encoding))
- {
- p += endTag.length();
- return p;
- }
- else
- {
- if (document) document->SetError(TIXML_ERROR_READING_END_TAG, p, data, encoding);
- return 0;
- }
- }
- else
- {
- // Try to read an attribute:
- TiXmlAttribute* attrib = new TiXmlAttribute();
- if (!attrib)
- {
- if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding);
- return 0;
- }
-
- attrib->SetDocument(document);
- const char* pErr = p;
- p = attrib->Parse(p, data, encoding);
-
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding);
- delete attrib;
- return 0;
- }
-
- // Handle the strange case of double attributes:
- TiXmlAttribute* node = attributeSet.Find(attrib->NameTStr());
- if (node)
- {
- node->SetValue(attrib->Value());
- delete attrib;
- return 0;
- }
-
- attributeSet.Add(attrib);
- }
- }
- return p;
-}
-
-
-const char* TiXmlElement::ReadValue(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- TiXmlDocument* document = GetDocument();
-
- // Read in text and elements in any order.
- const char* pWithWhiteSpace = p;
- p = SkipWhiteSpace(p, encoding);
-
- while (p && *p)
- {
- if (*p != '<')
- {
- // Take what we have, make a text element.
- TiXmlText* textNode = new TiXmlText("");
-
- if (!textNode)
- {
- if (document) document->SetError(TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding);
- return 0;
- }
-
- if (TiXmlBase::IsWhiteSpaceCondensed())
- {
- p = textNode->Parse(p, data, encoding);
- }
- else
- {
- // Special case: we want to keep the white space
- // so that leading spaces aren't removed.
- p = textNode->Parse(pWithWhiteSpace, data, encoding);
- }
-
- if (!textNode->Blank())
- LinkEndChild(textNode);
- else
- delete textNode;
- }
- else
- {
- // We hit a '<'
- // Have we hit a new element or an end tag? This could also be
- // a TiXmlText in the "CDATA" style.
- if (StringEqual(p, "</", false, encoding))
- {
- return p;
- }
- else
- {
- TiXmlNode* node = Identify(p, encoding);
- if (node)
- {
- p = node->Parse(p, data, encoding);
- LinkEndChild(node);
- }
- else
- {
- return 0;
- }
- }
- }
- pWithWhiteSpace = p;
- p = SkipWhiteSpace(p, encoding);
- }
-
- if (!p)
- {
- if (document) document->SetError(TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding);
- }
- return p;
-}
-
-
-#ifdef TIXML_USE_STL
-void TiXmlUnknown::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- while (in->good())
- {
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
- (*tag) += (char) c;
-
- if (c == '>')
- {
- // All is well.
- return;
- }
- }
-}
-#endif
-
-
-const char* TiXmlUnknown::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- TiXmlDocument* document = GetDocument();
- p = SkipWhiteSpace(p, encoding);
-
- if (data)
- {
- data->Stamp(p, encoding);
- location = data->Cursor();
- }
- if (!p || !*p || *p != '<')
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding);
- return 0;
- }
- ++p;
- value = "";
-
- while (p && *p && *p != '>')
- {
- value += *p;
- ++p;
- }
-
- if (!p)
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding);
- }
- if (*p == '>')
- return p+1;
- return p;
-}
-
-#ifdef TIXML_USE_STL
-void TiXmlComment::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- while (in->good())
- {
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
-
- (*tag) += (char) c;
-
- if (c == '>'
- && tag->at(tag->length() - 2) == '-'
- && tag->at(tag->length() - 3) == '-')
- {
- // All is well.
- return;
- }
- }
-}
-#endif
-
-
-const char* TiXmlComment::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- TiXmlDocument* document = GetDocument();
- value = "";
-
- p = SkipWhiteSpace(p, encoding);
-
- if (data)
- {
- data->Stamp(p, encoding);
- location = data->Cursor();
- }
- const char* startTag = "<!--";
- const char* endTag = "-->";
-
- if (!StringEqual(p, startTag, false, encoding))
- {
- document->SetError(TIXML_ERROR_PARSING_COMMENT, p, data, encoding);
- return 0;
- }
- p += strlen(startTag);
- p = ReadText(p, &value, false, endTag, false, encoding);
- return p;
-}
-
-
-const char* TiXmlAttribute::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- p = SkipWhiteSpace(p, encoding);
- if (!p || !*p) return 0;
-
-// int tabsize = 4;
-// if (document)
-// tabsize = document->TabSize();
-
- if (data)
- {
- data->Stamp(p, encoding);
- location = data->Cursor();
- }
- // Read the name, the '=' and the value.
- const char* pErr = p;
- p = ReadName(p, &name, encoding);
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding);
- return 0;
- }
- p = SkipWhiteSpace(p, encoding);
- if (!p || !*p || *p != '=')
- {
- if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
- return 0;
- }
-
- ++p; // skip '='
- p = SkipWhiteSpace(p, encoding);
- if (!p || !*p)
- {
- if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
- return 0;
- }
-
- const char* end;
- const char SINGLE_QUOTE = '\'';
- const char DOUBLE_QUOTE = '\"';
-
- if (*p == SINGLE_QUOTE)
- {
- ++p;
- end = "\'"; // single quote in string
- p = ReadText(p, &value, false, end, false, encoding);
- }
- else if (*p == DOUBLE_QUOTE)
- {
- ++p;
- end = "\""; // double quote in string
- p = ReadText(p, &value, false, end, false, encoding);
- }
- else
- {
- // All attribute values should be in single or double quotes.
- // But this is such a common error that the parser will try
- // its best, even without them.
- value = "";
- while ( p && *p // existence
- && !IsWhiteSpace(*p) && *p != '\n' && *p != '\r' // whitespace
- && *p != '/' && *p != '>') // tag end
- {
- if (*p == SINGLE_QUOTE || *p == DOUBLE_QUOTE) {
- // [ 1451649 ] Attribute values with trailing quotes not handled correctly
- // We did not have an opening quote but seem to have a
- // closing one. Give up and throw an error.
- if (document) document->SetError(TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding);
- return 0;
- }
- value += *p;
- ++p;
- }
- }
- return p;
-}
-
-#ifdef TIXML_USE_STL
-void TiXmlText::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- if (cdata)
- {
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
-
- (*tag) += (char) c;
-
- if (c == '>'
- && tag->at(tag->length() - 2) == ']'
- && tag->at(tag->length() - 3) == ']')
- {
- // All is well.
- return;
- }
- }
- else
- {
- while (in->good())
- {
- int c = in->peek();
- if (c == '<')
- return;
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
-
- (*tag) += (char) c;
- in->get();
- }
- }
-}
-#endif
-
-const char* TiXmlText::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding encoding)
-{
- value = "";
- TiXmlDocument* document = GetDocument();
-
- if (data)
- {
- data->Stamp(p, encoding);
- location = data->Cursor();
- }
-
- const char* const startTag = "<![CDATA[";
- const char* const endTag = "]]>";
-
- if (cdata || StringEqual(p, startTag, false, encoding))
- {
- cdata = true;
-
- if (!StringEqual(p, startTag, false, encoding))
- {
- document->SetError(TIXML_ERROR_PARSING_CDATA, p, data, encoding);
- return 0;
- }
- p += strlen(startTag);
-
- // Keep all the white space, ignore the encoding, etc.
- while ( p && *p
- && !StringEqual(p, endTag, false, encoding)
- )
- {
- value += *p;
- ++p;
- }
-
- TIXML_STRING dummy;
- p = ReadText(p, &dummy, false, endTag, false, encoding);
- return p;
- }
- else
- {
- bool ignoreWhite = true;
-
- const char* end = "<";
- p = ReadText(p, &value, ignoreWhite, end, false, encoding);
- if (p)
- return p-1; // don't truncate the '<'
- return 0;
- }
-}
-
-#ifdef TIXML_USE_STL
-void TiXmlDeclaration::StreamIn(TIXML_ISTREAM * in, TIXML_STRING * tag)
-{
- while (in->good())
- {
- int c = in->get();
- if (c <= 0)
- {
- TiXmlDocument* document = GetDocument();
- if (document)
- document->SetError(TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN);
- return;
- }
- (*tag) += (char) c;
-
- if (c == '>')
- {
- // All is well.
- return;
- }
- }
-}
-#endif
-
-const char* TiXmlDeclaration::Parse(const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding)
-{
- p = SkipWhiteSpace(p, _encoding);
- // Find the beginning, find the end, and look for
- // the stuff in-between.
- TiXmlDocument* document = GetDocument();
- if (!p || !*p || !StringEqual(p, "<?xml", true, _encoding))
- {
- if (document) document->SetError(TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding);
- return 0;
- }
- if (data)
- {
- data->Stamp(p, _encoding);
- location = data->Cursor();
- }
- p += 5;
-
- version = "";
- encoding = "";
- standalone = "";
-
- while (p && *p)
- {
- if (*p == '>')
- {
- ++p;
- return p;
- }
-
- p = SkipWhiteSpace(p, _encoding);
- if (StringEqual(p, "version", true, _encoding))
- {
- TiXmlAttribute attrib;
- p = attrib.Parse(p, data, _encoding);
- version = attrib.Value();
- }
- else if (StringEqual(p, "encoding", true, _encoding))
- {
- TiXmlAttribute attrib;
- p = attrib.Parse(p, data, _encoding);
- encoding = attrib.Value();
- }
- else if (StringEqual(p, "standalone", true, _encoding))
- {
- TiXmlAttribute attrib;
- p = attrib.Parse(p, data, _encoding);
- standalone = attrib.Value();
- }
- else
- {
- // Read over whatever it is.
- while (p && *p && *p != '>' && !IsWhiteSpace(*p))
- ++p;
- }
- }
- return 0;
-}
-
-bool TiXmlText::Blank() const
-{
- for (unsigned i=0; i<value.length(); i++)
- if (!IsWhiteSpace(value[i]))
- return false;
- return true;
-}
-