doc/html/tinyxmlparser_8cc_source.html

 /*

 www.sourceforge.net/projects/tinyxml

 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)


 This software is provided 'as-is', without any express or implied

 warranty. In no event will the authors be held liable for any

 damages arising from the use of this software.


 Permission is granted to anyone to use this software for any

 purpose, including commercial applications, and to alter it and

 redistribute it freely, subject to the following restrictions:


 1. The origin of this software must not be misrepresented; you must

 not claim that you wrote the original software. If you use this

 software in a product, an acknowledgment in the product documentation

 would be appreciated but is not required.


 2. Altered source versions must be plainly marked as such, and

 must not be misrepresented as being the original software.


 3. This notice may not be removed or altered from any source

 distribution.


  F.Gaede, DESY : changed extension to .cc  for use with gear

                  and include from "gearxml/tinyxml.h"

                : put in namespace gear

    $Id: tinyxmlparser.cc,v 1.2 2008-12-19 13:52:34 gaede Exp $

 */


 #include "gearxml/tinyxml.h"

 #include <ctype.h>

 #include <stddef.h>


 namespace gear{


 //#define DEBUG_PARSER


 // Note tha "PutString" hardcodes the same list. This

 // is less flexible than it appears. Changing the entries

 // or order will break putstring.

 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =

 {

         { "&amp;",  5, '&' },

         { "&lt;",   4, '<' },

         { "&gt;",   4, '>' },

         { "&quot;", 6, '\"' },

         { "&apos;", 6, '\'' }

 };


 // Bunch of unicode info at:

 //              http://www.unicode.org/faq/utf_bom.html

 // Including the basic of this table, which determines the #bytes in the

 // sequence from the lead byte. 1 placed for invalid sequences --

 // although the result will be junk, pass it through as much as possible.

 // Beware of the non-characters in UTF-8:

 //                              ef bb bf (Microsoft "lead bytes")

 //                              ef bf be

 //                              ef bf bf


 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;

 const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;

 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;


 const int TiXmlBase::utf8ByteTable[256] =

 {

         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x00

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x10

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x20

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x30

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x40

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x50

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x60

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0

                 1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0

                 1,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte

                 2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0

                 3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      3,      // 0xe0 0xe0 to 0xef 3 byte

                 4,      4,      4,      4,      4,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1       // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid

 };


 void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )

 {

         const unsigned long BYTE_MASK = 0xBF;

         const unsigned long BYTE_MARK = 0x80;

         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };


         if (input < 0x80)

                 *length = 1;

         else if ( input < 0x800 )

                 *length = 2;

         else if ( input < 0x10000 )

                 *length = 3;

         else if ( input < 0x200000 )

                 *length = 4;

         else

                 { *length = 0; return; }        // This code won't covert this correctly anyway.


         output += *length;


         // Scary scary fall throughs.

         switch (*length)

         {

                 case 4:

                         --output;

                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);

                         input >>= 6;

                 case 3:

                         --output;

                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);

                         input >>= 6;

                 case 2:

                         --output;

                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);

                         input >>= 6;

                 case 1:

                         --output;

                         *output = (char)(input | FIRST_BYTE_MARK[*length]);

         }

 }


 /*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )

 {

         // This will only work for low-ascii, everything else is assumed to be a valid

         // letter. I'm not sure this is the best approach, but it is quite tricky trying

         // to figure out alhabetical vs. not across encoding. So take a very

         // conservative approach.


 //      if ( encoding == TIXML_ENCODING_UTF8 )

 //      {

                 if ( anyByte < 127 )

                         return isalpha( anyByte );

                 else

                         return 1;       // What else to do? The unicode set is huge...get the english ones right.

 //      }

 //      else

 //      {

 //              return isalpha( anyByte );

 //      }

 }


 /*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )

 {

         // This will only work for low-ascii, everything else is assumed to be a valid

         // letter. I'm not sure this is the best approach, but it is quite tricky trying

         // to figure out alhabetical vs. not across encoding. So take a very

         // conservative approach.


 //      if ( encoding == TIXML_ENCODING_UTF8 )

 //      {

                 if ( anyByte < 127 )

                         return isalnum( anyByte );

                 else

                         return 1;       // What else to do? The unicode set is huge...get the english ones right.

 //      }

 //      else

 //      {

 //              return isalnum( anyByte );

 //      }

 }


 class TiXmlParsingData

 {

         friend class TiXmlDocument;

   public:

         void Stamp( const char* now, TiXmlEncoding encoding );


         const TiXmlCursor& Cursor()     { return cursor; }


   private:

         // Only used by the document!

         TiXmlParsingData( const char* start, int _tabsize, int row, int col )

         {

                 assert( start );

                 stamp = start;

                 tabsize = _tabsize;

                 cursor.row = row;

                 cursor.col = col;

         }


   TiXmlCursor           cursor{};

   const char*           stamp{};

   int                   tabsize{};

 };


 void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )

 {

         assert( now );


         // Do nothing if the tabsize is 0.

         if ( tabsize < 1 )

         {

                 return;

         }


         // Get the current row, column.

         int row = cursor.row;

         int col = cursor.col;

         const char* p = stamp;

         assert( p );


         while ( p < now )

         {

                 // Treat p as unsigned, so we have a happy compiler.

                 const unsigned char* pU = (const unsigned char*)p;


                 // Code contributed by Fletcher Dunn: (modified by lee)

                 switch (*pU) {

                         case 0:

                                 // We *should* never get here, but in case we do, don't

                                 // advance past the terminating null character, ever

                                 return;


                         case '\r':

                                 // bump down to the next line

                                 ++row;

                                 col = 0;

                                 // Eat the character

                                 ++p;


                                 // Check for \r\n sequence, and treat this as a single character

                                 if (*p == '\n') {

                                         ++p;

                                 }

                                 break;


                         case '\n':

                                 // bump down to the next line

                                 ++row;

                                 col = 0;


                                 // Eat the character

                                 ++p;


                                 // Check for \n\r sequence, and treat this as a single

                                 // character.  (Yes, this bizarre thing does occur still

                                 // on some arcane platforms...)

                                 if (*p == '\r') {

                                         ++p;

                                 }

                                 break;


                         case '\t':

                                 // Eat the character

                                 ++p;


                                 // Skip to next tab stop

                                 col = (col / tabsize + 1) * tabsize;

                                 break;


                         case TIXML_UTF_LEAD_0:

                                 if ( encoding == TIXML_ENCODING_UTF8 )

                                 {

                                         if ( *(p+1) && *(p+2) )

                                         {

                                                 // In these cases, don't advance the column. These are

                                                 // 0-width spaces.

                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )

                                                         p += 3;

                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )

                                                         p += 3;

                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )

                                                         p += 3;

                                                 else

                                                         { p +=3; ++col; }       // A normal character.

                                         }

                                 }

                                 else

                                 {

                                         ++p;

                                         ++col;

                                 }

                                 break;


                         default:

                                 if ( encoding == TIXML_ENCODING_UTF8 )

                                 {

                                         // Eat the 1 to 4 byte utf8 character.

                                         int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];

                                         if ( step == 0 )

                                                 step = 1;               // Error case from bad encoding, but handle gracefully.

                                         p += step;


                                         // Just advance one column, of course.

                                         ++col;

                                 }

                                 else

                                 {

                                         ++p;

                                         ++col;

                                 }

                                 break;

                 }

         }

         cursor.row = row;

         cursor.col = col;

         assert( cursor.row >= -1 );

         assert( cursor.col >= -1 );

         stamp = p;

         assert( stamp );

 }


 const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )

 {

         if ( !p || !*p )

         {

                 return 0;

         }

         if ( encoding == TIXML_ENCODING_UTF8 )

         {

                 while ( *p )

                 {

                         const unsigned char* pU = (const unsigned char*)p;


                         // Skip the stupid Microsoft UTF-8 Byte order marks

                         if (    *(pU+0)==TIXML_UTF_LEAD_0

                                  && *(pU+1)==TIXML_UTF_LEAD_1

                                  && *(pU+2)==TIXML_UTF_LEAD_2 )

                         {

                                 p += 3;

                                 continue;

                         }

                         else if(*(pU+0)==TIXML_UTF_LEAD_0

                                  && *(pU+1)==0xbfU

                                  && *(pU+2)==0xbeU )

                         {

                                 p += 3;

                                 continue;

                         }

                         else if(*(pU+0)==TIXML_UTF_LEAD_0

                                  && *(pU+1)==0xbfU

                                  && *(pU+2)==0xbfU )

                         {

                                 p += 3;

                                 continue;

                         }


                         if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )            // Still using old rules for white space.

                                 ++p;

                         else

                                 break;

                 }

         }

         else

         {

             while ( ( *p && IsWhiteSpace( *p ) ) || *p == '\n' || *p =='\r' )

                         ++p;

         }


         return p;

 }


 #ifdef TIXML_USE_STL

 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         for( ;; )

         {

                 if ( !in->good() ) return false;


                 int c = in->peek();

                 // At this scope, we can't get to a document. So fail silently.

                 if ( !IsWhiteSpace( c ) || c <= 0 )

                         return true;


                 *tag += (char) in->get();

         }

 }


 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )

 {

         //assert( character > 0 && character < 128 );   // else it won't work in utf-8

         while ( in->good() )

         {

                 int c = in->peek();

                 if ( c == character )

                         return true;

                 if ( c <= 0 )           // Silent failure: can't get document at this scope

                         return false;


                 in->get();

                 *tag += (char) c;

         }

         return false;

 }

 #endif


 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )

 {

         *name = "";

         assert( p );


         // Names start with letters or underscores.

         // Of course, in unicode, tinyxml has no idea what a letter *is*. The

         // algorithm is generous.

         //

         // After that, they can be letters, underscores, numbers,

         // hyphens, or colons. (Colons are valid ony for namespaces,

         // but tinyxml can't tell namespaces from names.)

         if (    p && *p

                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )

         {

                 while(          p && *p

                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding )

                                                  || *p == '_'

                                                  || *p == '-'

                                                  || *p == '.'

                                                  || *p == ':' ) )

                 {

                         (*name) += *p;

                         ++p;

                 }

                 return p;

         }

         return 0;

 }


 const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )

 {

         // Presume an entity, and pull it out.

     TIXML_STRING ent;

         int i;

         *length = 0;


         if ( *(p+1) && *(p+1) == '#' && *(p+2) )

         {

                 unsigned long ucs = 0;

                 ptrdiff_t delta = 0;

                 unsigned mult = 1;


                 if ( *(p+2) == 'x' )

                 {

                         // Hexadecimal.

                         if ( !*(p+3) ) return 0;


                         const char* q = p+3;

                         q = strchr( q, ';' );


                         if ( !q || !*q ) return 0;


                         delta = q-p;

                         --q;


                         while ( *q != 'x' )

                         {

                                 if ( *q >= '0' && *q <= '9' )

                                         ucs += mult * (*q - '0');

                                 else if ( *q >= 'a' && *q <= 'f' )

                                         ucs += mult * (*q - 'a' + 10);

                                 else if ( *q >= 'A' && *q <= 'F' )

                                         ucs += mult * (*q - 'A' + 10 );

                                 else

                                         return 0;

                                 mult *= 16;

                                 --q;

                         }

                 }

                 else

                 {

                         // Decimal.

                         if ( !*(p+2) ) return 0;


                         const char* q = p+2;

                         q = strchr( q, ';' );


                         if ( !q || !*q ) return 0;


                         delta = q-p;

                         --q;


                         while ( *q != '#' )

                         {

                                 if ( *q >= '0' && *q <= '9' )

                                         ucs += mult * (*q - '0');

                                 else

                                         return 0;

                                 mult *= 10;

                                 --q;

                         }

                 }

                 if ( encoding == TIXML_ENCODING_UTF8 )

                 {

                         // convert the UCS to UTF-8

                         ConvertUTF32ToUTF8( ucs, value, length );

                 }

                 else

                 {

                         *value = (char)ucs;

                         *length = 1;

                 }

                 return p + delta + 1;

         }


         // Now try to match it.

         for( i=0; i<NUM_ENTITY; ++i )

         {

                 if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )

                 {

                         assert( strlen( entity[i].str ) == entity[i].strLength );

                         *value = entity[i].chr;

                         *length = 1;

                         return ( p + entity[i].strLength );

                 }

         }


         // So it wasn't an entity, its unrecognized, or something like that.

         *value = *p;    // Don't put back the last one, since we return it!

         return p+1;

 }


 bool TiXmlBase::StringEqual( const char* p,

                                                          const char* tag,

                                                          bool ignoreCase,

                                                          TiXmlEncoding encoding )

 {

         assert( p );

         assert( tag );

         if ( !p || !*p )

         {

                 assert( 0 );

                 return false;

         }


         const char* q = p;


         if ( ignoreCase )

         {

                 while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )

                 {

                         ++q;

                         ++tag;

                 }


                 if ( *tag == 0 )

                         return true;

         }

         else

         {

                 while ( *q && *tag && *q == *tag )

                 {

                         ++q;

                         ++tag;

                 }


                 if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?

                         return true;

         }

         return false;

 }


 const char* TiXmlBase::ReadText(        const char* p,

                                                                         TIXML_STRING * text,

                                                                         bool trimWhiteSpace,

                                                                         const char* endTag,

                                                                         bool caseInsensitive,

                                                                         TiXmlEncoding encoding )

 {

     *text = "";

         if (    !trimWhiteSpace                 // certain tags always keep whitespace

                  || !condenseWhiteSpace )       // if true, whitespace is always kept

         {

                 // Keep all the white space.

                 while (    p && *p

                                 && !StringEqual( p, endTag, caseInsensitive, encoding )

                           )

                 {

                         int len;

                         char cArr[4] = { 0, 0, 0, 0 };

                         p = GetChar( p, cArr, &len, encoding );

                         text->append( cArr, len );

                 }

         }

         else

         {

                 bool whitespace = false;


                 // Remove leading white space:

                 p = SkipWhiteSpace( p, encoding );

                 while (    p && *p

                                 && !StringEqual( p, endTag, caseInsensitive, encoding ) )

                 {

                         if ( *p == '\r' || *p == '\n' )

                         {

                                 whitespace = true;

                                 ++p;

                         }

                         else if ( IsWhiteSpace( *p ) )

                         {

                                 whitespace = true;

                                 ++p;

                         }

                         else

                         {

                                 // If we've found whitespace, add it before the

                                 // new character. Any whitespace just becomes a space.

                                 if ( whitespace )

                                 {

                                         (*text) += ' ';

                                         whitespace = false;

                                 }

                                 int len;

                                 char cArr[4] = { 0, 0, 0, 0 };

                                 p = GetChar( p, cArr, &len, encoding );

                                 if ( len == 1 )

                                         (*text) += cArr[0];     // more efficient

                                 else

                                         text->append( cArr, len );

                         }

                 }

         }

         return p + strlen( endTag );

 }


 #ifdef TIXML_USE_STL


 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         // The basic issue with a document is that we don't know what we're

         // streaming. Read something presumed to be a tag (and hope), then

         // identify it, and call the appropriate stream method on the tag.

         //

         // This "pre-streaming" will never read the closing ">" so the

         // sub-tag can orient itself.


         if ( !StreamTo( in, '<', tag ) )

         {

                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

                 return;

         }


         while ( in->good() )

         {

                 int tagIndex = (int) tag->length();

                 while ( in->good() && in->peek() != '>' )

                 {

                         int c = in->get();

                         if ( c <= 0 )

                         {

                                 SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                                 break;

                         }

                         (*tag) += (char) c;

                 }


                 if ( in->good() )

                 {

                         // We now have something we presume to be a node of

                         // some sort. Identify it, and call the node to

                         // continue streaming.

                         TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );


                         if ( node )

                         {

                                 node->StreamIn( in, tag );

                                 bool isElement = node->ToElement() != 0;

                                 delete node;

                                 node = 0;


                                 // If this is the root element, we're done. Parsing will be

                                 // done by the >> operator.

                                 if ( isElement )

                                 {

                                         return;

                                 }

                         }

                         else

                         {

                                 SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );

                                 return;

                         }

                 }

         }

         // We should have returned sooner.

         SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );

 }


 #endif


 const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )

 {

         ClearError();


         // Parse away, at the document level. Since a document

         // contains nothing but other tags, most of what happens

         // here is skipping white space.

         if ( !p || !*p )

         {

                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

                 return 0;

         }


         // Note that, for a document, this needs to come

         // before the while space skip, so that parsing

         // starts from the pointer we are given.

         location.Clear();

         if ( prevData )

         {

                 location.row = prevData->cursor.row;

                 location.col = prevData->cursor.col;

         }

         else

         {

                 location.row = 0;

                 location.col = 0;

         }

         TiXmlParsingData data( p, TabSize(), location.row, location.col );

         location = data.Cursor();


         if ( encoding == TIXML_ENCODING_UNKNOWN )

         {

                 // Check for the Microsoft UTF-8 lead bytes.

                 const unsigned char* pU = (const unsigned char*)p;

                 if (    *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0

                          && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1

                          && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )

                 {

                         encoding = TIXML_ENCODING_UTF8;

                 }

         }


     p = SkipWhiteSpace( p, encoding );

         if ( !p )

         {

                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );

                 return 0;

         }


         while ( p && *p )

         {

                 TiXmlNode* node = Identify( p, encoding );

                 if ( node )

                 {

                         p = node->Parse( p, &data, encoding );

                         LinkEndChild( node );

                 }

                 else

                 {

                         break;

                 }


                 // Did we get encoding info?

                 if (    encoding == TIXML_ENCODING_UNKNOWN

                          && node->ToDeclaration() )

                 {

                         TiXmlDeclaration* dec = node->ToDeclaration();

                         const char* enc = dec->Encoding();

                         assert( enc );


                         if ( *enc == 0 )

                                 encoding = TIXML_ENCODING_UTF8;

                         else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )

                                 encoding = TIXML_ENCODING_UTF8;

                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )

                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice

                         else

                                 encoding = TIXML_ENCODING_LEGACY;

                 }


                 p = SkipWhiteSpace( p, encoding );

         }


         // Was this empty?

         if ( !firstChild ) {

                 SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );

                 return 0;

         }


         // All is well.

         return p;

 }


 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         // The first error in a chain is more accurate - don't set again!

         if ( error )

                 return;


         assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );

         error   = true;

         errorId = err;

         errorDesc = errorString[ errorId ];


         errorLocation.Clear();

         if ( pError && data )

         {

                 //TiXmlParsingData data( pError, prevData );

                 data->Stamp( pError, encoding );

                 errorLocation = data->Cursor();

         }

 }


 TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )

 {

         TiXmlNode* returnNode = 0;


         p = SkipWhiteSpace( p, encoding );

         if( !p || !*p || *p != '<' )

         {

                 return 0;

         }


         TiXmlDocument* doc = GetDocument();

         p = SkipWhiteSpace( p, encoding );


         if ( !p || !*p )

         {

                 return 0;

         }


         // What is this thing?

         // - Elements start with a letter or underscore, but xml is reserved.

         // - Comments: <!--

         // - Decleration: <?xml

         // - Everthing else is unknown to tinyxml.

         //


         const char* xmlHeader = { "<?xml" };

         const char* commentHeader = { "<!--" };

         const char* dtdHeader = { "<!" };


         if ( StringEqual( p, xmlHeader, true, encoding ) )

         {

                 #ifdef DEBUG_PARSER

                         TIXML_LOG( "XML parsing Declaration\n" );

                 #endif

                 returnNode = new TiXmlDeclaration();

         }

         else if ( StringEqual( p, commentHeader, false, encoding ) )

         {

                 #ifdef DEBUG_PARSER

                         TIXML_LOG( "XML parsing Comment\n" );

                 #endif

                 returnNode = new TiXmlComment();

         }

         else if ( StringEqual( p, dtdHeader, false, encoding ) )

         {

                 #ifdef DEBUG_PARSER

                         TIXML_LOG( "XML parsing Unknown(1)\n" );

                 #endif

                 returnNode = new TiXmlUnknown();

         }

         else if (    IsAlpha( *(p+1), encoding )

                           || *(p+1) == '_' )

         {

                 #ifdef DEBUG_PARSER

                         TIXML_LOG( "XML parsing Element\n" );

                 #endif

                 returnNode = new TiXmlElement( "" );

         }

         else

         {

                 #ifdef DEBUG_PARSER

                         TIXML_LOG( "XML parsing Unknown(2)\n" );

                 #endif

                 returnNode = new TiXmlUnknown();

         }


         if ( returnNode )

         {

                 // Set the parent, so it can report errors

                 returnNode->parent = this;

         }

         else

         {

                 if ( doc )

                         doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );

         }

         return returnNode;

 }


 #ifdef TIXML_USE_STL


 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)

 {

         // We're called with some amount of pre-parsing. That is, some of "this"

         // element is in "tag". Go ahead and stream to the closing ">"

         while( in->good() )

         {

                 int c = in->get();

                 if ( c <= 0 )

                 {

                         TiXmlDocument* document = GetDocument();

                         if ( document )

                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                         return;

                 }

                 (*tag) += (char) c ;


                 if ( c == '>' )

                         break;

         }


         if ( tag->length() < 3 ) return;


         // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.

         // If not, identify and stream.


         if (    tag->at( tag->length() - 1 ) == '>'

                  && tag->at( tag->length() - 2 ) == '/' )

         {

                 // All good!

                 return;

         }

         else if ( tag->at( tag->length() - 1 ) == '>' )

         {

                 // There is more. Could be:

                 //              text

                 //              closing tag

                 //              another node.

                 for ( ;; )

                 {

                         StreamWhiteSpace( in, tag );


                         // Do we have text?

                         if ( in->good() && in->peek() != '<' )

                         {

                                 // Yep, text.

                                 TiXmlText text( "" );

                                 text.StreamIn( in, tag );


                                 // What follows text is a closing tag or another node.

                                 // Go around again and figure it out.

                                 continue;

                         }


                         // We now have either a closing tag...or another node.

                         // We should be at a "<", regardless.

                         if ( !in->good() ) return;

                         assert( in->peek() == '<' );

                         int tagIndex = tag->length();


                         bool closingTag = false;

                         bool firstCharFound = false;


                         for( ;; )

                         {

                                 if ( !in->good() )

                                         return;


                                 int c = in->peek();

                                 if ( c <= 0 )

                                 {

                                         TiXmlDocument* document = GetDocument();

                                         if ( document )

                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                                         return;

                                 }


                                 if ( c == '>' )

                                         break;


                                 *tag += (char) c;

                                 in->get();


                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )

                                 {

                                         firstCharFound = true;

                                         if ( c == '/' )

                                                 closingTag = true;

                                 }

                         }

                         // If it was a closing tag, then read in the closing '>' to clean up the input stream.

                         // If it was not, the streaming will be done by the tag.

                         if ( closingTag )

                         {

                                 if ( !in->good() )

                                         return;


                                 int c = in->get();

                                 if ( c <= 0 )

                                 {

                                         TiXmlDocument* document = GetDocument();

                                         if ( document )

                                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                                         return;

                                 }

                                 assert( c == '>' );

                                 *tag += (char) c;


                                 // We are done, once we've found our closing tag.

                                 return;

                         }

                         else

                         {

                                 // If not a closing tag, id it, and stream.

                                 const char* tagloc = tag->c_str() + tagIndex;

                                 TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );

                                 if ( !node )

                                         return;

                                 node->StreamIn( in, tag );

                                 delete node;

                                 node = 0;


                                 // No return: go around from the beginning: text, closing tag, or node.

                         }

                 }

         }

 }

 #endif


 const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         p = SkipWhiteSpace( p, encoding );

         TiXmlDocument* document = GetDocument();


         if ( !p || !*p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );

                 return 0;

         }


 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, encoding );

                 location = data->Cursor();

         }


         if ( *p != '<' )

         {

                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );

                 return 0;

         }


         p = SkipWhiteSpace( p+1, encoding );


         // Read the name.

         const char* pErr = p;


     p = ReadName( p, &value, encoding );

         if ( !p || !*p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );

                 return 0;

         }


     TIXML_STRING endTag ("</");

         endTag += value;

         endTag += ">";


         // Check for and read attributes. Also look for an empty

         // tag or an end tag.

         while ( p && *p )

         {

                 pErr = p;

                 p = SkipWhiteSpace( p, encoding );

                 if ( !p || !*p )

                 {

                         if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );

                         return 0;

                 }

                 if ( *p == '/' )

                 {

                         ++p;

                         // Empty tag.

                         if ( *p  != '>' )

                         {

                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );

                                 return 0;

                         }

                         return (p+1);

                 }

                 else if ( *p == '>' )

                 {

                         // Done with attributes (if there were any.)

                         // Read the value -- which can include other

                         // elements -- read the end tag, and return.

                         ++p;

                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.

                         if ( !p || !*p )

                                 return 0;


                         // We should find the end tag now

                         if ( StringEqual( p, endTag.c_str(), false, encoding ) )

                         {

                                 p += endTag.length();

                                 return p;

                         }

                         else

                         {

                                 if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );

                                 return 0;

                         }

                 }

                 else

                 {

                         // Try to read an attribute:

                         TiXmlAttribute* attrib = new TiXmlAttribute();

                         if ( !attrib )

                         {

                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );

                                 return 0;

                         }


                         attrib->SetDocument( document );

                         const char* ppErr = p;

                         p = attrib->Parse( p, data, encoding );


                         if ( !p || !*p )

                         {

                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, ppErr, data, encoding );

                                 delete attrib;

                                 return 0;

                         }


                         // Handle the strange case of double attributes:

                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );

                         if ( node )

                         {

                                 node->SetValue( attrib->Value() );

                                 delete attrib;

                                 return 0;

                         }


                         attributeSet.Add( attrib );

                 }

         }

         return p;

 }


 const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         TiXmlDocument* document = GetDocument();


         const char* pWithWhiteSpace = p;

         // Read in text and elements in any order.

         p = SkipWhiteSpace( p, encoding );

         while ( p && *p )

         {

                 if ( *p != '<' )

                 {

                         // Take what we have, make a text element.

                         TiXmlText* textNode = new TiXmlText( "" );


                         if ( !textNode )

                         {

                                 if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );

                                     return 0;

                         }


                         if ( TiXmlBase::IsWhiteSpaceCondensed() )

                         {

                                 p = textNode->Parse( p, data, encoding );

                         }

                         else

                         {

                                 // Special case: we want to keep the white space

                                 // so that leading spaces aren't removed.

                                 p = textNode->Parse( pWithWhiteSpace, data, encoding );

                         }


                         if ( !textNode->Blank() )

                                 LinkEndChild( textNode );

                         else

                                 delete textNode;

                 }

                 else

                 {

                         // We hit a '<'

                         // Have we hit a new element or an end tag?

                         if ( StringEqual( p, "</", false, encoding ) )

                         {

                                 return p;

                         }

                         else

                         {

                                 TiXmlNode* node = Identify( p, encoding );

                                 if ( node )

                                 {

                                         p = node->Parse( p, data, encoding );

                                         LinkEndChild( node );

                                 }

                                 else

                                 {

                                         return 0;

                                 }

                         }

                 }

                 p = SkipWhiteSpace( p, encoding );

         }


         if ( !p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );

         }

         return p;

 }


 #ifdef TIXML_USE_STL

 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         while ( in->good() )

         {

                 int c = in->get();

                 if ( c <= 0 )

                 {

                         TiXmlDocument* document = GetDocument();

                         if ( document )

                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                         return;

                 }

                 (*tag) += (char) c;


                 if ( c == '>' )

                 {

                         // All is well.

                         return;

                 }

         }

 }

 #endif


 const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         TiXmlDocument* document = GetDocument();

         p = SkipWhiteSpace( p, encoding );


 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, encoding );

                 location = data->Cursor();

         }

         if ( !p || !*p || *p != '<' )

         {

                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );

                 return 0;

         }

         ++p;

     value = "";


         while ( p && *p && *p != '>' )

         {

                 value += *p;

                 ++p;

         }


         if ( !p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );

         }

         if ( *p == '>' )

                 return p+1;

         return p;

 }


 #ifdef TIXML_USE_STL

 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         while ( in->good() )

         {

                 int c = in->get();

                 if ( c <= 0 )

                 {

                         TiXmlDocument* document = GetDocument();

                         if ( document )

                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                         return;

                 }


                 (*tag) += (char) c;


                 if ( c == '>'

                          && tag->at( tag->length() - 2 ) == '-'

                          && tag->at( tag->length() - 3 ) == '-' )

                 {

                         // All is well.

                         return;

                 }

         }

 }

 #endif


 const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         TiXmlDocument* document = GetDocument();

         value = "";


         p = SkipWhiteSpace( p, encoding );


 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, encoding );

                 location = data->Cursor();

         }

         const char* startTag = "<!--";

         const char* endTag   = "-->";


         if ( !StringEqual( p, startTag, false, encoding ) )

         {

                 document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );

                 return 0;

         }

         p += strlen( startTag );

         p = ReadText( p, &value, false, endTag, false, encoding );

         return p;

 }


 const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         p = SkipWhiteSpace( p, encoding );

         if ( !p || !*p ) return 0;


 //fg  this code has no effect and causes: -Wunused-but-set-variable

 //fg    int tabsize = 4;

 //fg    if ( document )

 //fg            tabsize = document->TabSize();


 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, encoding );

                 location = data->Cursor();

         }

         // Read the name, the '=' and the value.

         const char* pErr = p;

         p = ReadName( p, &name, encoding );

         if ( !p || !*p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );

                 return 0;

         }

         p = SkipWhiteSpace( p, encoding );

         if ( !p || !*p || *p != '=' )

         {

                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );

                 return 0;

         }


         ++p;    // skip '='

         p = SkipWhiteSpace( p, encoding );

         if ( !p || !*p )

         {

                 if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );

                 return 0;

         }


         const char* end;


         if ( *p == '\'' )

         {

                 ++p;

                 end = "\'";

                 p = ReadText( p, &value, false, end, false, encoding );

         }

         else if ( *p == '"' )

         {

                 ++p;

                 end = "\"";

                 p = ReadText( p, &value, false, end, false, encoding );

         }

         else

         {

                 // All attribute values should be in single or double quotes.

                 // But this is such a common error that the parser will try

                 // its best, even without them.

                 value = "";

                 while (    p && *p                                                                              // existence

                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace

                                 && *p != '/' && *p != '>' )                                             // tag end

                 {

                         value += *p;

                         ++p;

                 }

         }

         return p;

 }


 #ifdef TIXML_USE_STL

 void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         while ( in->good() )

         {

                 int c = in->peek();

                 if ( c == '<' )

                         return;

                 if ( c <= 0 )

                 {

                         TiXmlDocument* document = GetDocument();

                         if ( document )

                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                         return;

                 }


                 (*tag) += (char) c;

                 in->get();

         }

 }

 #endif


 const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )

 {

         value = "";

 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, encoding );

                 location = data->Cursor();

         }

         bool ignoreWhite = true;


         const char* end = "<";

         p = ReadText( p, &value, ignoreWhite, end, false, encoding );

         if ( p )

                 return p-1;     // don't truncate the '<'

         return 0;

 }


 #ifdef TIXML_USE_STL

 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )

 {

         while ( in->good() )

         {

                 int c = in->get();

                 if ( c <= 0 )

                 {

                         TiXmlDocument* document = GetDocument();

                         if ( document )

                                 document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );

                         return;

                 }

                 (*tag) += (char) c;


                 if ( c == '>' )

                 {

                         // All is well.

                         return;

                 }

         }

 }

 #endif


 const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )

 {

         p = SkipWhiteSpace( p, _encoding );

         // Find the beginning, find the end, and look for

         // the stuff in-between.

         TiXmlDocument* document = GetDocument();

         if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )

         {

                 if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );

                 return 0;

         }

 //      TiXmlParsingData data( p, prevData );

         if ( data )

         {

                 data->Stamp( p, _encoding );

                 location = data->Cursor();

         }

         p += 5;


         version = "";

         encoding = "";

         standalone = "";


         while ( p && *p )

         {

                 if ( *p == '>' )

                 {

                         ++p;

                         return p;

                 }


                 p = SkipWhiteSpace( p, _encoding );

                 if ( StringEqual( p, "version", true, _encoding ) )

                 {

                         TiXmlAttribute attrib;

                         p = attrib.Parse( p, data, _encoding );

                         version = attrib.Value();

                 }

                 else if ( StringEqual( p, "encoding", true, _encoding ) )

                 {

                         TiXmlAttribute attrib;

                         p = attrib.Parse( p, data, _encoding );

                         encoding = attrib.Value();

                 }

                 else if ( StringEqual( p, "standalone", true, _encoding ) )

                 {

                         TiXmlAttribute attrib;

                         p = attrib.Parse( p, data, _encoding );

                         standalone = attrib.Value();

                 }

                 else

                 {

                         // Read over whatever it is.

                         while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )

                                 ++p;

                 }

         }

         return 0;

 }


 bool TiXmlText::Blank() const

 {

         for ( unsigned i=0; i<value.length(); i++ )

                 if ( !IsWhiteSpace( value[i] ) )

                         return false;

         return true;

 }


 } //fg: end namespace gear

gear::TiXmlDocument::Parse
virtual const char * Parse(const char *p, TiXmlParsingData *data=0, TiXmlEncoding encoding=TIXML_DEFAULT_ENCODING)
Parse the given null terminated block of xml data.
Definition: tinyxmlparser.cc:689

gear::TiXmlAttribute::SetValue
void SetValue(const char *_value)
Set the value.
Definition: tinyxml.h:728

gear::TiXmlNode::LinkEndChild
TiXmlNode * LinkEndChild(TiXmlNode *addThis)
Add a new node related to this.
Definition: tinyxml.cc:201

gear::TiXmlDeclaration
In correct XML the declaration is the first entry in the file.
Definition: tinyxml.h:1061

gear::TiXmlBase::IsWhiteSpaceCondensed
static bool IsWhiteSpaceCondensed()
Return the current white space setting.
Definition: tinyxml.h:169

gear::TiXmlDeclaration::Encoding
const char * Encoding() const
Encoding. Will return an empty string if none was found.
Definition: tinyxml.h:1087

gear::TiXmlDocument
Always the top level node.
Definition: tinyxml.h:1154

gear::TiXmlNode::ToDeclaration
const TiXmlDeclaration * ToDeclaration() const
Cast to a more defined type. Will return null not of the requested type.
Definition: tinyxml.h:621

gear::TiXmlNode
The parent class for everything in the Document Object Model.
Definition: tinyxml.h:370

gear::TiXmlDocument::ClearError
void ClearError()
If you have handled the error, it can be reset with this call.
Definition: tinyxml.h:1262

gear::TiXmlCursor
Definition: tinyxml.h:92

gear::TiXmlNode::GetDocument
const TiXmlDocument * GetDocument() const
Return a pointer to the Document this node lives in.
Definition: tinyxml.cc:595

gear::TiXmlParsingData
Definition: tinyxmlparser.cc:170