LLC2_API
|
00001 /** 00002 **************************************************************************** 00003 * <P> XML.c - implementation file for basic XML parser written in ANSI C++ 00004 * for portability. It works by using recursion and a node tree for breaking 00005 * down the elements of an XML document. </P> 00006 * 00007 * @version V2.43 00008 * @author Frank Vanden Berghen 00009 * 00010 * NOTE: 00011 * 00012 * If you add "#define STRICT_PARSING", on the first line of this file 00013 * the parser will see the following XML-stream: 00014 * <a><b>some text</b><b>other text </a> 00015 * as an error. Otherwise, this tring will be equivalent to: 00016 * <a><b>some text</b><b>other text</b></a> 00017 * 00018 * NOTE: 00019 * 00020 * If you add "#define APPROXIMATE_PARSING" on the first line of this file 00021 * the parser will see the following XML-stream: 00022 * <data name="n1"> 00023 * <data name="n2"> 00024 * <data name="n3" /> 00025 * as equivalent to the following XML-stream: 00026 * <data name="n1" /> 00027 * <data name="n2" /> 00028 * <data name="n3" /> 00029 * This can be useful for badly-formed XML-streams but prevent the use 00030 * of the following XML-stream (problem is: tags at contiguous levels 00031 * have the same names): 00032 * <data name="n1"> 00033 * <data name="n2"> 00034 * <data name="n3" /> 00035 * </data> 00036 * </data> 00037 * 00038 * NOTE: 00039 * 00040 * If you add "#define _XMLPARSER_NO_MESSAGEBOX_" on the first line of this file 00041 * the "openFileHelper" function will always display error messages inside the 00042 * console instead of inside a message-box-window. Message-box-windows are 00043 * available on windows 9x/NT/2000/XP/Vista only. 00044 * 00045 * Copyright (c) 2002, Business-Insight 00046 * <a href="http://www.Business-Insight.com">Business-Insight</a> 00047 * All rights reserved. 00048 * See the file "AFPL-license.txt" about the licensing terms 00049 * 00050 **************************************************************************** 00051 */ 00052 #ifndef _CRT_SECURE_NO_DEPRECATE 00053 #define _CRT_SECURE_NO_DEPRECATE 00054 #endif 00055 #include "xmlParser.h" 00056 #ifdef _XMLWINDOWS 00057 //#ifdef _DEBUG 00058 //#define _CRTDBG_MAP_ALLOC 00059 //#include <crtdbg.h> 00060 //#endif 00061 #define WIN32_LEAN_AND_MEAN 00062 #include <Windows.h> // to have IsTextUnicode, MultiByteToWideChar, WideCharToMultiByte to handle unicode files 00063 // to have "MessageBoxA" to display error messages for openFilHelper 00064 #endif 00065 00066 #include <memory.h> 00067 #include <assert.h> 00068 #include <stdio.h> 00069 #include <string.h> 00070 #include <stdlib.h> 00071 00072 XMLCSTR XMLNode::getVersion() { return _CXML("v2.43"); } 00073 void freeXMLString(XMLSTR t){if(t)free(t);} 00074 00075 static XMLNode::XMLCharEncoding characterEncoding=XMLNode::char_encoding_UTF8; 00076 static char guessWideCharChars=1, dropWhiteSpace=1, removeCommentsInMiddleOfText=1; 00077 00078 inline int mmin( const int t1, const int t2 ) { return t1 < t2 ? t1 : t2; } 00079 00080 // You can modify the initialization of the variable "XMLClearTags" below 00081 // to change the clearTags that are currently recognized by the library. 00082 // The number on the second columns is the length of the string inside the 00083 // first column. 00084 // The "<!DOCTYPE" declaration must be the second in the list. 00085 // The "<!--" declaration must be the third in the list. 00086 // All ClearTag Strings must start with the '<' character. 00087 typedef struct { XMLCSTR lpszOpen; int openTagLen; XMLCSTR lpszClose;} ALLXMLClearTag; 00088 static ALLXMLClearTag XMLClearTags[] = 00089 { 00090 { _CXML("<![CDATA["),9, _CXML("]]>") }, 00091 { _CXML("<!DOCTYPE"),9, _CXML(">") }, 00092 { _CXML("<!--") ,4, _CXML("-->") }, 00093 { _CXML("<PRE>") ,5, _CXML("</PRE>") }, 00094 // { _CXML("<Script>") ,8, _CXML("</Script>")}, 00095 { NULL ,0, NULL } 00096 }; 00097 00098 // You can modify the initialization of the variable "XMLEntities" below 00099 // to change the character entities that are currently recognized by the library. 00100 // The number on the second columns is the length of the string inside the 00101 // first column. Additionally, the syntaxes " " and " " are recognized. 00102 typedef struct { XMLCSTR s; int l; XMLCHAR c;} XMLCharacterEntity; 00103 static XMLCharacterEntity XMLEntities[] = 00104 { 00105 { _CXML("&" ), 5, _CXML('&' )}, 00106 { _CXML("<" ), 4, _CXML('<' )}, 00107 { _CXML(">" ), 4, _CXML('>' )}, 00108 { _CXML("""), 6, _CXML('\"')}, 00109 { _CXML("'"), 6, _CXML('\'')}, 00110 { NULL , 0, '\0' } 00111 }; 00112 00113 // When rendering the XMLNode to a string (using the "createXMLString" function), 00114 // you can ask for a beautiful formatting. This formatting is using the 00115 // following indentation character: 00116 #define INDENTCHAR _CXML('\t') 00117 00118 // The following function parses the XML errors into a user friendly string. 00119 // You can edit this to change the output language of the library to something else. 00120 XMLCSTR XMLNode::getError(XMLError xerror) 00121 { 00122 switch (xerror) 00123 { 00124 case eXMLErrorNone: return _CXML("No error"); 00125 case eXMLErrorMissingEndTag: return _CXML("Warning: Unmatched end tag"); 00126 case eXMLErrorNoXMLTagFound: return _CXML("Warning: No XML tag found"); 00127 case eXMLErrorEmpty: return _CXML("Error: No XML data"); 00128 case eXMLErrorMissingTagName: return _CXML("Error: Missing start tag name"); 00129 case eXMLErrorMissingEndTagName: return _CXML("Error: Missing end tag name"); 00130 case eXMLErrorUnmatchedEndTag: return _CXML("Error: Unmatched end tag"); 00131 case eXMLErrorUnmatchedEndClearTag: return _CXML("Error: Unmatched clear tag end"); 00132 case eXMLErrorUnexpectedToken: return _CXML("Error: Unexpected token found"); 00133 case eXMLErrorNoElements: return _CXML("Error: No elements found"); 00134 case eXMLErrorFileNotFound: return _CXML("Error: File not found"); 00135 case eXMLErrorFirstTagNotFound: return _CXML("Error: First Tag not found"); 00136 case eXMLErrorUnknownCharacterEntity:return _CXML("Error: Unknown character entity"); 00137 case eXMLErrorCharacterCodeAbove255: return _CXML("Error: Character code above 255 is forbidden in MultiByte char mode."); 00138 case eXMLErrorCharConversionError: return _CXML("Error: unable to convert between WideChar and MultiByte chars"); 00139 case eXMLErrorCannotOpenWriteFile: return _CXML("Error: unable to open file for writing"); 00140 case eXMLErrorCannotWriteFile: return _CXML("Error: cannot write into file"); 00141 00142 case eXMLErrorBase64DataSizeIsNotMultipleOf4: return _CXML("Warning: Base64-string length is not a multiple of 4"); 00143 case eXMLErrorBase64DecodeTruncatedData: return _CXML("Warning: Base64-string is truncated"); 00144 case eXMLErrorBase64DecodeIllegalCharacter: return _CXML("Error: Base64-string contains an illegal character"); 00145 case eXMLErrorBase64DecodeBufferTooSmall: return _CXML("Error: Base64 decode output buffer is too small"); 00146 }; 00147 return _CXML("Unknown"); 00148 } 00149 00150 ///////////////////////////////////////////////////////////////////////// 00151 // Here start the abstraction layer to be OS-independent // 00152 ///////////////////////////////////////////////////////////////////////// 00153 00154 // Here is an abstraction layer to access some common string manipulation functions. 00155 // The abstraction layer is currently working for gcc, Microsoft Visual Studio 6.0, 00156 // Microsoft Visual Studio .NET, CC (sun compiler) and Borland C++. 00157 // If you plan to "port" the library to a new system/compiler, all you have to do is 00158 // to edit the following lines. 00159 #ifdef XML_NO_WIDE_CHAR 00160 char myIsTextWideChar(const void *b, int len) { return FALSE; } 00161 #else 00162 #if defined (UNDER_CE) || !defined(_XMLWINDOWS) 00163 char myIsTextWideChar(const void *b, int len) // inspired by the Wine API: RtlIsTextUnicode 00164 { 00165 #ifdef sun 00166 // for SPARC processors: wchar_t* buffers must always be alligned, otherwise it's a char* buffer. 00167 if ((((unsigned long)b)%sizeof(wchar_t))!=0) return FALSE; 00168 #endif 00169 const wchar_t *s=(const wchar_t*)b; 00170 00171 // buffer too small: 00172 if (len<(int)sizeof(wchar_t)) return FALSE; 00173 00174 // odd length test 00175 if (len&1) return FALSE; 00176 00177 /* only checks the first 256 characters */ 00178 len=mmin(256,len/sizeof(wchar_t)); 00179 00180 // Check for the special byte order: 00181 if (*((unsigned short*)s) == 0xFFFE) return TRUE; // IS_TEXT_UNICODE_REVERSE_SIGNATURE; 00182 if (*((unsigned short*)s) == 0xFEFF) return TRUE; // IS_TEXT_UNICODE_SIGNATURE 00183 00184 // checks for ASCII characters in the UNICODE stream 00185 int i,stats=0; 00186 for (i=0; i<len; i++) if (s[i]<=(unsigned short)255) stats++; 00187 if (stats>len/2) return TRUE; 00188 00189 // Check for UNICODE NULL chars 00190 for (i=0; i<len; i++) if (!s[i]) return TRUE; 00191 00192 return FALSE; 00193 } 00194 #else 00195 char myIsTextWideChar(const void *b,int l) { return (char)IsTextUnicode((CONST LPVOID)b,l,NULL); } 00196 #endif 00197 #endif 00198 00199 #ifdef _XMLWINDOWS 00200 // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 00201 #ifdef _XMLWIDECHAR 00202 wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) 00203 { 00204 int i; 00205 if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,NULL,0); 00206 else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,NULL,0); 00207 if (i<0) return NULL; 00208 wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(XMLCHAR)); 00209 if (ce==XMLNode::char_encoding_UTF8) i=(int)MultiByteToWideChar(CP_UTF8,0 ,s,-1,d,i); 00210 else i=(int)MultiByteToWideChar(CP_ACP ,MB_PRECOMPOSED,s,-1,d,i); 00211 d[i]=0; 00212 return d; 00213 } 00214 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return _wfopen(filename,mode); } 00215 static inline int xstrlen(XMLCSTR c) { return (int)wcslen(c); } 00216 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _wcsnicmp(c1,c2,l);} 00217 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} 00218 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _wcsicmp(c1,c2); } 00219 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } 00220 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } 00221 #else 00222 char *myWideCharToMultiByte(const wchar_t *s) 00223 { 00224 UINT codePage=CP_ACP; if (characterEncoding==XMLNode::char_encoding_UTF8) codePage=CP_UTF8; 00225 int i=(int)WideCharToMultiByte(codePage, // code page 00226 0, // performance and mapping flags 00227 s, // wide-character string 00228 -1, // number of chars in string 00229 NULL, // buffer for new string 00230 0, // size of buffer 00231 NULL, // default for unmappable chars 00232 NULL // set when default char used 00233 ); 00234 if (i<0) return NULL; 00235 char *d=(char*)malloc(i+1); 00236 WideCharToMultiByte(codePage, // code page 00237 0, // performance and mapping flags 00238 s, // wide-character string 00239 -1, // number of chars in string 00240 d, // buffer for new string 00241 i, // size of buffer 00242 NULL, // default for unmappable chars 00243 NULL // set when default char used 00244 ); 00245 d[i]=0; 00246 return d; 00247 } 00248 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } 00249 static inline int xstrlen(XMLCSTR c) { return (int)strlen(c); } 00250 #ifdef __BORLANDC__ 00251 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strnicmp(c1,c2,l);} 00252 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return stricmp(c1,c2); } 00253 #else 00254 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return _strnicmp(c1,c2,l);} 00255 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return _stricmp(c1,c2); } 00256 #endif 00257 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} 00258 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } 00259 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } 00260 #endif 00261 #else 00262 // for gcc and CC 00263 #ifdef XML_NO_WIDE_CHAR 00264 char *myWideCharToMultiByte(const wchar_t *s) { return NULL; } 00265 #else 00266 char *myWideCharToMultiByte(const wchar_t *s) 00267 { 00268 const wchar_t *ss=s; 00269 int i=(int)wcsrtombs(NULL,&ss,0,NULL); 00270 if (i<0) return NULL; 00271 char *d=(char *)malloc(i+1); 00272 wcsrtombs(d,&s,i,NULL); 00273 d[i]=0; 00274 return d; 00275 } 00276 #endif 00277 #ifdef _XMLWIDECHAR 00278 wchar_t *myMultiByteToWideChar(const char *s, XMLNode::XMLCharEncoding ce) 00279 { 00280 const char *ss=s; 00281 int i=(int)mbsrtowcs(NULL,&ss,0,NULL); 00282 if (i<0) return NULL; 00283 wchar_t *d=(wchar_t *)malloc((i+1)*sizeof(wchar_t)); 00284 mbsrtowcs(d,&s,i,NULL); 00285 d[i]=0; 00286 return d; 00287 } 00288 int xstrlen(XMLCSTR c) { return wcslen(c); } 00289 #ifdef sun 00290 // for CC 00291 #include <widec.h> 00292 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncasecmp(c1,c2,l);} 00293 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wsncmp(c1,c2,l);} 00294 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wscasecmp(c1,c2); } 00295 #else 00296 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncmp(c1,c2,l);} 00297 #ifdef __linux__ 00298 // for gcc/linux 00299 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return wcsncasecmp(c1,c2,l);} 00300 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return wcscasecmp(c1,c2); } 00301 #else 00302 #include <wctype.h> 00303 // for gcc/non-linux (MacOS X 10.3, FreeBSD 6.0, NetBSD 3.0, OpenBSD 3.8, AIX 4.3.2, HP-UX 11, IRIX 6.5, OSF/1 5.1, Cygwin, mingw) 00304 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) 00305 { 00306 wchar_t left,right; 00307 do 00308 { 00309 left=towlower(*c1++); right=towlower(*c2++); 00310 } while (left&&(left==right)); 00311 return (int)left-(int)right; 00312 } 00313 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) 00314 { 00315 wchar_t left,right; 00316 while(l--) 00317 { 00318 left=towlower(*c1++); right=towlower(*c2++); 00319 if ((!left)||(left!=right)) return (int)left-(int)right; 00320 } 00321 return 0; 00322 } 00323 #endif 00324 #endif 00325 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)wcsstr(c1,c2); } 00326 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)wcscpy(c1,c2); } 00327 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) 00328 { 00329 char *filenameAscii=myWideCharToMultiByte(filename); 00330 FILE *f; 00331 if (mode[0]==_CXML('r')) f=fopen(filenameAscii,"rb"); 00332 else f=fopen(filenameAscii,"wb"); 00333 free(filenameAscii); 00334 return f; 00335 } 00336 #else 00337 static inline FILE *xfopen(XMLCSTR filename,XMLCSTR mode) { return fopen(filename,mode); } 00338 static inline int xstrlen(XMLCSTR c) { return strlen(c); } 00339 static inline int xstrnicmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncasecmp(c1,c2,l);} 00340 static inline int xstrncmp(XMLCSTR c1, XMLCSTR c2, int l) { return strncmp(c1,c2,l);} 00341 static inline int xstricmp(XMLCSTR c1, XMLCSTR c2) { return strcasecmp(c1,c2); } 00342 static inline XMLSTR xstrstr(XMLCSTR c1, XMLCSTR c2) { return (XMLSTR)strstr(c1,c2); } 00343 static inline XMLSTR xstrcpy(XMLSTR c1, XMLCSTR c2) { return (XMLSTR)strcpy(c1,c2); } 00344 #endif 00345 static inline int _strnicmp(const char *c1,const char *c2, int l) { return strncasecmp(c1,c2,l);} 00346 #endif 00347 00348 00349 /////////////////////////////////////////////////////////////////////////////// 00350 // the "xmltoc,xmltob,xmltoi,xmltol,xmltof,xmltoa" functions // 00351 /////////////////////////////////////////////////////////////////////////////// 00352 // These 6 functions are not used inside the XMLparser. 00353 // There are only here as "convenience" functions for the user. 00354 // If you don't need them, you can delete them without any trouble. 00355 #ifdef _XMLWIDECHAR 00356 #ifdef _XMLWINDOWS 00357 // for Microsoft Visual Studio 6.0 and Microsoft Visual Studio .NET and Borland C++ Builder 6.0 00358 char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)_wtoi(t); return v; } 00359 int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return _wtoi(t); return v; } 00360 long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return _wtol(t); return v; } 00361 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; } 00362 #else 00363 #ifdef sun 00364 // for CC 00365 #include <widec.h> 00366 char xmltob(XMLCSTR t,char v){ if (t) return (char)wstol(t,NULL,10); return v; } 00367 int xmltoi(XMLCSTR t,int v){ if (t) return (int)wstol(t,NULL,10); return v; } 00368 long xmltol(XMLCSTR t,long v){ if (t) return wstol(t,NULL,10); return v; } 00369 #else 00370 // for gcc 00371 char xmltob(XMLCSTR t,char v){ if (t) return (char)wcstol(t,NULL,10); return v; } 00372 int xmltoi(XMLCSTR t,int v){ if (t) return (int)wcstol(t,NULL,10); return v; } 00373 long xmltol(XMLCSTR t,long v){ if (t) return wcstol(t,NULL,10); return v; } 00374 #endif 00375 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) swscanf(t, L"%lf", &v); /*v=_wtof(t);*/ return v; } 00376 #endif 00377 #else 00378 char xmltob(XMLCSTR t,char v){ if (t&&(*t)) return (char)atoi(t); return v; } 00379 int xmltoi(XMLCSTR t,int v){ if (t&&(*t)) return atoi(t); return v; } 00380 long xmltol(XMLCSTR t,long v){ if (t&&(*t)) return atol(t); return v; } 00381 double xmltof(XMLCSTR t,double v){ if (t&&(*t)) return atof(t); return v; } 00382 #endif 00383 XMLCSTR xmltoa(XMLCSTR t, XMLCSTR v){ if (t) return t; return v; } 00384 XMLCHAR xmltoc(XMLCSTR t,const XMLCHAR v){ if (t&&(*t)) return *t; return v; } 00385 00386 ///////////////////////////////////////////////////////////////////////// 00387 // the "openFileHelper" function // 00388 ///////////////////////////////////////////////////////////////////////// 00389 00390 // Since each application has its own way to report and deal with errors, you should modify & rewrite 00391 // the following "openFileHelper" function to get an "error reporting mechanism" tailored to your needs. 00392 XMLNode XMLNode::openFileHelper(XMLCSTR filename, XMLCSTR tag) 00393 { 00394 // guess the value of the global parameter "characterEncoding" 00395 // (the guess is based on the first 200 bytes of the file). 00396 FILE *f=xfopen(filename,_CXML("rb")); 00397 if (f) 00398 { 00399 char bb[205]; 00400 int l=(int)fread(bb,1,200,f); 00401 setGlobalOptions(guessCharEncoding(bb,l),guessWideCharChars,dropWhiteSpace,removeCommentsInMiddleOfText); 00402 fclose(f); 00403 } 00404 00405 // parse the file 00406 XMLResults pResults; 00407 XMLNode xnode=XMLNode::parseFile(filename,tag,&pResults); 00408 00409 // display error message (if any) 00410 if (pResults.error != eXMLErrorNone) 00411 { 00412 // create message 00413 char message[2000],*s1=(char*)"",*s3=(char*)""; XMLCSTR s2=_CXML(""); 00414 if (pResults.error==eXMLErrorFirstTagNotFound) { s1=(char*)"First Tag should be '"; s2=tag; s3=(char*)"'.\n"; } 00415 sprintf(message, 00416 #ifdef _XMLWIDECHAR 00417 "XML Parsing error inside file '%S'.\n%S\nAt line %i, column %i.\n%s%S%s" 00418 #else 00419 "XML Parsing error inside file '%s'.\n%s\nAt line %i, column %i.\n%s%s%s" 00420 #endif 00421 ,filename,XMLNode::getError(pResults.error),pResults.nLine,pResults.nColumn,s1,s2,s3); 00422 00423 // display message 00424 #if defined(_XMLWINDOWS) && !defined(UNDER_CE) && !defined(_XMLPARSER_NO_MESSAGEBOX_) 00425 MessageBoxA(NULL,message,"XML Parsing error",MB_OK|MB_ICONERROR|MB_TOPMOST); 00426 #else 00427 printf("%s",message); 00428 #endif 00429 exit(255); 00430 } 00431 return xnode; 00432 } 00433 00434 ///////////////////////////////////////////////////////////////////////// 00435 // Here start the core implementation of the XMLParser library // 00436 ///////////////////////////////////////////////////////////////////////// 00437 00438 // You should normally not change anything below this point. 00439 00440 #ifndef _XMLWIDECHAR 00441 // If "characterEncoding=ascii" then we assume that all characters have the same length of 1 byte. 00442 // If "characterEncoding=UTF8" then the characters have different lengths (from 1 byte to 4 bytes). 00443 // If "characterEncoding=ShiftJIS" then the characters have different lengths (from 1 byte to 2 bytes). 00444 // This table is used as lookup-table to know the length of a character (in byte) based on the 00445 // content of the first byte of the character. 00446 // (note: if you modify this, you must always have XML_utf8ByteTable[0]=0 ). 00447 static const char XML_utf8ByteTable[256] = 00448 { 00449 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00450 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00451 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00452 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00453 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00454 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00455 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00456 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00457 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 End of ASCII range 00458 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 0x80 to 0xc1 invalid 00459 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 00460 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 00461 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 00462 1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 0xc2 to 0xdf 2 byte 00463 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00464 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,// 0xe0 0xe0 to 0xef 3 byte 00465 4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid 00466 }; 00467 static const char XML_legacyByteTable[256] = 00468 { 00469 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00470 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00471 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00472 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00473 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 00474 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 00475 }; 00476 static const char XML_sjisByteTable[256] = 00477 { 00478 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00479 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00480 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00481 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00482 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00483 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00484 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00485 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00486 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00487 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0x9F 2 bytes 00488 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 00489 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xa0 00490 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xb0 00491 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xc0 00492 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0xd0 00493 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 0xe0 to 0xef 2 bytes 00494 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 // 0xf0 00495 }; 00496 static const char XML_gb2312ByteTable[256] = 00497 { 00498 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00499 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00500 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00501 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00502 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00503 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00504 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00505 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00506 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00507 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x80 00508 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x90 00509 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 0xa1 to 0xf7 2 bytes 00510 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 00511 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 00512 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00513 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 00514 2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1 // 0xf0 00515 }; 00516 static const char XML_gbk_big5_ByteTable[256] = 00517 { 00518 // 0 1 2 3 4 5 6 7 8 9 a b c d e f 00519 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x00 00520 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x10 00521 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x20 00522 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x30 00523 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x40 00524 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x50 00525 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x60 00526 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,// 0x70 00527 1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x80 0x81 to 0xfe 2 bytes 00528 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0x90 00529 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xa0 00530 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xb0 00531 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xc0 00532 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xd0 00533 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,// 0xe0 00534 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1 // 0xf0 00535 }; 00536 static const char *XML_ByteTable=(const char *)XML_utf8ByteTable; // the default is "characterEncoding=XMLNode::encoding_UTF8" 00537 #endif 00538 00539 00540 XMLNode XMLNode::emptyXMLNode; 00541 XMLClear XMLNode::emptyXMLClear={ NULL, NULL, NULL}; 00542 XMLAttribute XMLNode::emptyXMLAttribute={ NULL, NULL}; 00543 00544 // Enumeration used to decipher what type a token is 00545 typedef enum XMLTokenTypeTag 00546 { 00547 eTokenText = 0, 00548 eTokenQuotedText, 00549 eTokenTagStart, /* "<" */ 00550 eTokenTagEnd, /* "</" */ 00551 eTokenCloseTag, /* ">" */ 00552 eTokenEquals, /* "=" */ 00553 eTokenDeclaration, /* "<?" */ 00554 eTokenShortHandClose, /* "/>" */ 00555 eTokenClear, 00556 eTokenError 00557 } XMLTokenType; 00558 00559 // Main structure used for parsing XML 00560 typedef struct XML 00561 { 00562 XMLCSTR lpXML; 00563 XMLCSTR lpszText; 00564 int nIndex,nIndexMissigEndTag; 00565 enum XMLError error; 00566 XMLCSTR lpEndTag; 00567 int cbEndTag; 00568 XMLCSTR lpNewElement; 00569 int cbNewElement; 00570 int nFirst; 00571 } XML; 00572 00573 typedef struct 00574 { 00575 ALLXMLClearTag *pClr; 00576 XMLCSTR pStr; 00577 } NextToken; 00578 00579 // Enumeration used when parsing attributes 00580 typedef enum Attrib 00581 { 00582 eAttribName = 0, 00583 eAttribEquals, 00584 eAttribValue 00585 } Attrib; 00586 00587 // Enumeration used when parsing elements to dictate whether we are currently 00588 // inside a tag 00589 typedef enum XMLStatus 00590 { 00591 eInsideTag = 0, 00592 eOutsideTag 00593 } XMLStatus; 00594 00595 XMLError XMLNode::writeToFile(XMLCSTR filename, const char *encoding, char nFormat) const 00596 { 00597 if (!d) return eXMLErrorNone; 00598 FILE *f=xfopen(filename,_CXML("wb")); 00599 if (!f) return eXMLErrorCannotOpenWriteFile; 00600 #ifdef _XMLWIDECHAR 00601 unsigned char h[2]={ 0xFF, 0xFE }; 00602 if (!fwrite(h,2,1,f)) 00603 { 00604 fclose(f); 00605 return eXMLErrorCannotWriteFile; 00606 } 00607 if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) 00608 { 00609 if (!fwrite(L"<?xml version=\"1.0\" encoding=\"utf-16\"?>\n",sizeof(wchar_t)*40,1,f)) 00610 { 00611 fclose(f); 00612 return eXMLErrorCannotWriteFile; 00613 } 00614 } 00615 #else 00616 if ((!isDeclaration())&&((d->lpszName)||(!getChildNode().isDeclaration()))) 00617 { 00618 if (characterEncoding==char_encoding_UTF8) 00619 { 00620 // header so that windows recognize the file as UTF-8: 00621 unsigned char h[3]={0xEF,0xBB,0xBF}; 00622 if (!fwrite(h,3,1,f)) 00623 { 00624 fclose(f); 00625 return eXMLErrorCannotWriteFile; 00626 } 00627 encoding="utf-8"; 00628 } else if (characterEncoding==char_encoding_ShiftJIS) encoding="SHIFT-JIS"; 00629 00630 if (!encoding) encoding="ISO-8859-1"; 00631 if (fprintf(f,"<?xml version=\"1.0\" encoding=\"%s\"?>\n",encoding)<0) 00632 { 00633 fclose(f); 00634 return eXMLErrorCannotWriteFile; 00635 } 00636 } else 00637 { 00638 if (characterEncoding==char_encoding_UTF8) 00639 { 00640 unsigned char h[3]={0xEF,0xBB,0xBF}; 00641 if (!fwrite(h,3,1,f)) 00642 { 00643 fclose(f); 00644 return eXMLErrorCannotWriteFile; 00645 } 00646 } 00647 } 00648 #endif 00649 int i; 00650 XMLSTR t=createXMLString(nFormat,&i); 00651 if (!fwrite(t,sizeof(XMLCHAR)*i,1,f)) 00652 { 00653 free(t); 00654 fclose(f); 00655 return eXMLErrorCannotWriteFile; 00656 } 00657 if (fclose(f)!=0) 00658 { 00659 free(t); 00660 return eXMLErrorCannotWriteFile; 00661 } 00662 free(t); 00663 return eXMLErrorNone; 00664 } 00665 00666 // Duplicate a given string. 00667 XMLSTR stringDup(XMLCSTR lpszData, int cbData) 00668 { 00669 if (lpszData==NULL) return NULL; 00670 00671 XMLSTR lpszNew; 00672 if (cbData==-1) cbData=(int)xstrlen(lpszData); 00673 lpszNew = (XMLSTR)malloc((cbData+1) * sizeof(XMLCHAR)); 00674 if (lpszNew) 00675 { 00676 memcpy(lpszNew, lpszData, (cbData) * sizeof(XMLCHAR)); 00677 lpszNew[cbData] = (XMLCHAR)NULL; 00678 } 00679 return lpszNew; 00680 } 00681 00682 XMLSTR ToXMLStringTool::toXMLUnSafe(XMLSTR dest,XMLCSTR source) 00683 { 00684 XMLSTR dd=dest; 00685 XMLCHAR ch; 00686 XMLCharacterEntity *entity; 00687 while ((ch=*source)) 00688 { 00689 entity=XMLEntities; 00690 do 00691 { 00692 if (ch==entity->c) {xstrcpy(dest,entity->s); dest+=entity->l; source++; goto out_of_loop1; } 00693 entity++; 00694 } while(entity->s); 00695 #ifdef _XMLWIDECHAR 00696 *(dest++)=*(source++); 00697 #else 00698 switch(XML_ByteTable[(unsigned char)ch]) 00699 { 00700 case 4: *(dest++)=*(source++); 00701 case 3: *(dest++)=*(source++); 00702 case 2: *(dest++)=*(source++); 00703 case 1: *(dest++)=*(source++); 00704 } 00705 #endif 00706 out_of_loop1: 00707 ; 00708 } 00709 *dest=0; 00710 return dd; 00711 } 00712 00713 // private (used while rendering): 00714 int ToXMLStringTool::lengthXMLString(XMLCSTR source) 00715 { 00716 int r=0; 00717 XMLCharacterEntity *entity; 00718 XMLCHAR ch; 00719 while ((ch=*source)) 00720 { 00721 entity=XMLEntities; 00722 do 00723 { 00724 if (ch==entity->c) { r+=entity->l; source++; goto out_of_loop1; } 00725 entity++; 00726 } while(entity->s); 00727 #ifdef _XMLWIDECHAR 00728 r++; source++; 00729 #else 00730 ch=XML_ByteTable[(unsigned char)ch]; r+=ch; source+=ch; 00731 #endif 00732 out_of_loop1: 00733 ; 00734 } 00735 return r; 00736 } 00737 00738 ToXMLStringTool::~ToXMLStringTool(){ freeBuffer(); } 00739 void ToXMLStringTool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } 00740 XMLSTR ToXMLStringTool::toXML(XMLCSTR source) 00741 { 00742 if (!source) 00743 { 00744 if (buflen<1) { buflen=1; buf=(XMLSTR)malloc(sizeof(XMLCHAR)); } 00745 *buf=0; 00746 return buf; 00747 } 00748 int l=lengthXMLString(source)+1; 00749 if (l>buflen) { freeBuffer(); buflen=l; buf=(XMLSTR)malloc(l*sizeof(XMLCHAR)); } 00750 return toXMLUnSafe(buf,source); 00751 } 00752 00753 // private: 00754 XMLSTR fromXMLString(XMLCSTR s, int lo, XML *pXML) 00755 { 00756 // This function is the opposite of the function "toXMLString". It decodes the escape 00757 // sequences &, ", ', <, > and replace them by the characters 00758 // &,",',<,>. This function is used internally by the XML Parser. All the calls to 00759 // the XML library will always gives you back "decoded" strings. 00760 // 00761 // in: string (s) and length (lo) of string 00762 // out: new allocated string converted from xml 00763 if (!s) return NULL; 00764 00765 int ll=0,j; 00766 XMLSTR d; 00767 XMLCSTR ss=s; 00768 XMLCharacterEntity *entity; 00769 while ((lo>0)&&(*s)) 00770 { 00771 if (*s==_CXML('&')) 00772 { 00773 if ((lo>2)&&(s[1]==_CXML('#'))) 00774 { 00775 s+=2; lo-=2; 00776 if ((*s==_CXML('X'))||(*s==_CXML('x'))) { s++; lo--; } 00777 while ((*s)&&(*s!=_CXML(';'))&&((lo--)>0)) s++; 00778 if (*s!=_CXML(';')) 00779 { 00780 pXML->error=eXMLErrorUnknownCharacterEntity; 00781 return NULL; 00782 } 00783 s++; lo--; 00784 } else 00785 { 00786 entity=XMLEntities; 00787 do 00788 { 00789 if ((lo>=entity->l)&&(xstrnicmp(s,entity->s,entity->l)==0)) { s+=entity->l; lo-=entity->l; break; } 00790 entity++; 00791 } while(entity->s); 00792 if (!entity->s) 00793 { 00794 pXML->error=eXMLErrorUnknownCharacterEntity; 00795 return NULL; 00796 } 00797 } 00798 } else 00799 { 00800 #ifdef _XMLWIDECHAR 00801 s++; lo--; 00802 #else 00803 j=XML_ByteTable[(unsigned char)*s]; s+=j; lo-=j; ll+=j-1; 00804 #endif 00805 } 00806 ll++; 00807 } 00808 00809 d=(XMLSTR)malloc((ll+1)*sizeof(XMLCHAR)); 00810 s=d; 00811 while (ll-->0) 00812 { 00813 if (*ss==_CXML('&')) 00814 { 00815 if (ss[1]==_CXML('#')) 00816 { 00817 ss+=2; j=0; 00818 if ((*ss==_CXML('X'))||(*ss==_CXML('x'))) 00819 { 00820 ss++; 00821 while (*ss!=_CXML(';')) 00822 { 00823 if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j<<4)+*ss-_CXML('0'); 00824 else if ((*ss>=_CXML('A'))&&(*ss<=_CXML('F'))) j=(j<<4)+*ss-_CXML('A')+10; 00825 else if ((*ss>=_CXML('a'))&&(*ss<=_CXML('f'))) j=(j<<4)+*ss-_CXML('a')+10; 00826 else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} 00827 ss++; 00828 } 00829 } else 00830 { 00831 while (*ss!=_CXML(';')) 00832 { 00833 if ((*ss>=_CXML('0'))&&(*ss<=_CXML('9'))) j=(j*10)+*ss-_CXML('0'); 00834 else { free((void*)s); pXML->error=eXMLErrorUnknownCharacterEntity;return NULL;} 00835 ss++; 00836 } 00837 } 00838 #ifndef _XMLWIDECHAR 00839 if (j>255) { free((void*)s); pXML->error=eXMLErrorCharacterCodeAbove255;return NULL;} 00840 #endif 00841 (*d++)=(XMLCHAR)j; ss++; 00842 } else 00843 { 00844 entity=XMLEntities; 00845 do 00846 { 00847 if (xstrnicmp(ss,entity->s,entity->l)==0) { *(d++)=entity->c; ss+=entity->l; break; } 00848 entity++; 00849 } while(entity->s); 00850 } 00851 } else 00852 { 00853 #ifdef _XMLWIDECHAR 00854 *(d++)=*(ss++); 00855 #else 00856 switch(XML_ByteTable[(unsigned char)*ss]) 00857 { 00858 case 4: *(d++)=*(ss++); ll--; 00859 case 3: *(d++)=*(ss++); ll--; 00860 case 2: *(d++)=*(ss++); ll--; 00861 case 1: *(d++)=*(ss++); 00862 } 00863 #endif 00864 } 00865 } 00866 *d=0; 00867 return (XMLSTR)s; 00868 } 00869 00870 #define XML_isSPACECHAR(ch) ((ch==_CXML('\n'))||(ch==_CXML(' '))||(ch== _CXML('\t'))||(ch==_CXML('\r'))) 00871 00872 // private: 00873 char myTagCompare(XMLCSTR cclose, XMLCSTR copen) 00874 // !!!! WARNING strange convention&: 00875 // return 0 if equals 00876 // return 1 if different 00877 { 00878 if (!cclose) return 1; 00879 int l=(int)xstrlen(cclose); 00880 if (xstrnicmp(cclose, copen, l)!=0) return 1; 00881 const XMLCHAR c=copen[l]; 00882 if (XML_isSPACECHAR(c)|| 00883 (c==_CXML('/' ))|| 00884 (c==_CXML('<' ))|| 00885 (c==_CXML('>' ))|| 00886 (c==_CXML('=' ))) return 0; 00887 return 1; 00888 } 00889 00890 // Obtain the next character from the string. 00891 static inline XMLCHAR getNextChar(XML *pXML) 00892 { 00893 XMLCHAR ch = pXML->lpXML[pXML->nIndex]; 00894 #ifdef _XMLWIDECHAR 00895 if (ch!=0) pXML->nIndex++; 00896 #else 00897 pXML->nIndex+=XML_ByteTable[(unsigned char)ch]; 00898 #endif 00899 return ch; 00900 } 00901 00902 // Find the next token in a string. 00903 // pcbToken contains the number of characters that have been read. 00904 static NextToken GetNextToken(XML *pXML, int *pcbToken, enum XMLTokenTypeTag *pType) 00905 { 00906 NextToken result; 00907 XMLCHAR ch; 00908 XMLCHAR chTemp; 00909 int indexStart,nFoundMatch,nIsText=FALSE; 00910 result.pClr=NULL; // prevent warning 00911 00912 // Find next non-white space character 00913 do { indexStart=pXML->nIndex; ch=getNextChar(pXML); } while XML_isSPACECHAR(ch); 00914 00915 if (ch) 00916 { 00917 // Cache the current string pointer 00918 result.pStr = &pXML->lpXML[indexStart]; 00919 00920 // check for standard tokens 00921 switch(ch) 00922 { 00923 // Check for quotes 00924 case _CXML('\''): 00925 case _CXML('\"'): 00926 // Type of token 00927 *pType = eTokenQuotedText; 00928 chTemp = ch; 00929 00930 // Set the size 00931 nFoundMatch = FALSE; 00932 00933 // Search through the string to find a matching quote 00934 while((ch = getNextChar(pXML))) 00935 { 00936 if (ch==chTemp) { nFoundMatch = TRUE; break; } 00937 if (ch==_CXML('<')) break; 00938 } 00939 00940 // If we failed to find a matching quote 00941 if (nFoundMatch == FALSE) 00942 { 00943 pXML->nIndex=indexStart+1; 00944 nIsText=TRUE; 00945 break; 00946 } 00947 00948 // 4.02.2002 00949 // if (FindNonWhiteSpace(pXML)) pXML->nIndex--; 00950 00951 break; 00952 00953 // Equals (used with attribute values) 00954 case _CXML('='): 00955 *pType = eTokenEquals; 00956 break; 00957 00958 // Close tag 00959 case _CXML('>'): 00960 *pType = eTokenCloseTag; 00961 break; 00962 00963 // Check for tag start and tag end 00964 case _CXML('<'): 00965 00966 { 00967 // First check whether the token is in the clear tag list (meaning it 00968 // does not need formatting). 00969 ALLXMLClearTag *ctag=XMLClearTags; 00970 do 00971 { 00972 if (!xstrncmp(ctag->lpszOpen, result.pStr, ctag->openTagLen)) 00973 { 00974 result.pClr=ctag; 00975 pXML->nIndex+=ctag->openTagLen-1; 00976 *pType=eTokenClear; 00977 return result; 00978 } 00979 ctag++; 00980 } while(ctag->lpszOpen); 00981 00982 // Peek at the next character to see if we have an end tag '</', 00983 // or an xml declaration '<?' 00984 chTemp = pXML->lpXML[pXML->nIndex]; 00985 00986 // If we have a tag end... 00987 if (chTemp == _CXML('/')) 00988 { 00989 // Set the type and ensure we point at the next character 00990 getNextChar(pXML); 00991 *pType = eTokenTagEnd; 00992 } 00993 00994 // If we have an XML declaration tag 00995 else if (chTemp == _CXML('?')) 00996 { 00997 00998 // Set the type and ensure we point at the next character 00999 getNextChar(pXML); 01000 *pType = eTokenDeclaration; 01001 } 01002 01003 // Otherwise we must have a start tag 01004 else 01005 { 01006 *pType = eTokenTagStart; 01007 } 01008 break; 01009 } 01010 01011 // Check to see if we have a short hand type end tag ('/>'). 01012 case _CXML('/'): 01013 01014 // Peek at the next character to see if we have a short end tag '/>' 01015 chTemp = pXML->lpXML[pXML->nIndex]; 01016 01017 // If we have a short hand end tag... 01018 if (chTemp == _CXML('>')) 01019 { 01020 // Set the type and ensure we point at the next character 01021 getNextChar(pXML); 01022 *pType = eTokenShortHandClose; 01023 break; 01024 } 01025 01026 // If we haven't found a short hand closing tag then drop into the 01027 // text process 01028 01029 // Other characters 01030 default: 01031 nIsText = TRUE; 01032 } 01033 01034 // If this is a TEXT node 01035 if (nIsText) 01036 { 01037 // Indicate we are dealing with text 01038 *pType = eTokenText; 01039 while((ch = getNextChar(pXML))) 01040 { 01041 if XML_isSPACECHAR(ch) 01042 { 01043 indexStart++; break; 01044 01045 } else if (ch==_CXML('/')) 01046 { 01047 // If we find a slash then this maybe text or a short hand end tag 01048 // Peek at the next character to see it we have short hand end tag 01049 ch=pXML->lpXML[pXML->nIndex]; 01050 // If we found a short hand end tag then we need to exit the loop 01051 if (ch==_CXML('>')) { pXML->nIndex--; break; } 01052 01053 } else if ((ch==_CXML('<'))||(ch==_CXML('>'))||(ch==_CXML('='))) 01054 { 01055 pXML->nIndex--; break; 01056 } 01057 } 01058 } 01059 *pcbToken = pXML->nIndex-indexStart; 01060 } else 01061 { 01062 // If we failed to obtain a valid character 01063 *pcbToken = 0; 01064 *pType = eTokenError; 01065 result.pStr=NULL; 01066 } 01067 01068 return result; 01069 } 01070 01071 XMLCSTR XMLNode::updateName_WOSD(XMLSTR lpszName) 01072 { 01073 if (!d) { free(lpszName); return NULL; } 01074 if (d->lpszName&&(lpszName!=d->lpszName)) free((void*)d->lpszName); 01075 d->lpszName=lpszName; 01076 return lpszName; 01077 } 01078 01079 // private: 01080 XMLNode::XMLNode(struct XMLNodeDataTag *p){ d=p; (p->ref_count)++; } 01081 XMLNode::XMLNode(XMLNodeData *pParent, XMLSTR lpszName, char isDeclaration) 01082 { 01083 d=(XMLNodeData*)malloc(sizeof(XMLNodeData)); 01084 d->ref_count=1; 01085 01086 d->lpszName=NULL; 01087 d->nChild= 0; 01088 d->nText = 0; 01089 d->nClear = 0; 01090 d->nAttribute = 0; 01091 01092 d->isDeclaration = isDeclaration; 01093 01094 d->pParent = pParent; 01095 d->pChild= NULL; 01096 d->pText= NULL; 01097 d->pClear= NULL; 01098 d->pAttribute= NULL; 01099 d->pOrder= NULL; 01100 01101 updateName_WOSD(lpszName); 01102 } 01103 01104 XMLNode XMLNode::createXMLTopNode_WOSD(XMLSTR lpszName, char isDeclaration) { return XMLNode(NULL,lpszName,isDeclaration); } 01105 XMLNode XMLNode::createXMLTopNode(XMLCSTR lpszName, char isDeclaration) { return XMLNode(NULL,stringDup(lpszName),isDeclaration); } 01106 01107 #define MEMORYINCREASE 50 01108 01109 static inline void myFree(void *p) { if (p) free(p); } 01110 static inline void *myRealloc(void *p, int newsize, int memInc, int sizeofElem) 01111 { 01112 if (p==NULL) { if (memInc) return malloc(memInc*sizeofElem); return malloc(sizeofElem); } 01113 if ((memInc==0)||((newsize%memInc)==0)) p=realloc(p,(newsize+memInc)*sizeofElem); 01114 // if (!p) 01115 // { 01116 // printf("XMLParser Error: Not enough memory! Aborting...\n"); exit(220); 01117 // } 01118 return p; 01119 } 01120 01121 // private: 01122 XMLElementPosition XMLNode::findPosition(XMLNodeData *d, int index, XMLElementType xxtype) 01123 { 01124 if (index<0) return -1; 01125 int i=0,j=(int)((index<<2)+xxtype),*o=d->pOrder; while (o[i]!=j) i++; return i; 01126 } 01127 01128 // private: 01129 // update "order" information when deleting a content of a XMLNode 01130 int XMLNode::removeOrderElement(XMLNodeData *d, XMLElementType t, int index) 01131 { 01132 int n=d->nChild+d->nText+d->nClear, *o=d->pOrder,i=findPosition(d,index,t); 01133 memmove(o+i, o+i+1, (n-i)*sizeof(int)); 01134 for (;i<n;i++) 01135 if ((o[i]&3)==(int)t) o[i]-=4; 01136 // We should normally do: 01137 // d->pOrder=(int)realloc(d->pOrder,n*sizeof(int)); 01138 // but we skip reallocation because it's too time consuming. 01139 // Anyway, at the end, it will be free'd completely at once. 01140 return i; 01141 } 01142 01143 void *XMLNode::addToOrder(int memoryIncrease,int *_pos, int nc, void *p, int size, XMLElementType xtype) 01144 { 01145 // in: *_pos is the position inside d->pOrder ("-1" means "EndOf") 01146 // out: *_pos is the index inside p 01147 p=myRealloc(p,(nc+1),memoryIncrease,size); 01148 int n=d->nChild+d->nText+d->nClear; 01149 d->pOrder=(int*)myRealloc(d->pOrder,n+1,memoryIncrease*3,sizeof(int)); 01150 int pos=*_pos,*o=d->pOrder; 01151 01152 if ((pos<0)||(pos>=n)) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } 01153 01154 int i=pos; 01155 memmove(o+i+1, o+i, (n-i)*sizeof(int)); 01156 01157 while ((pos<n)&&((o[pos]&3)!=(int)xtype)) pos++; 01158 if (pos==n) { *_pos=nc; o[n]=(int)((nc<<2)+xtype); return p; } 01159 01160 o[i]=o[pos]; 01161 for (i=pos+1;i<=n;i++) if ((o[i]&3)==(int)xtype) o[i]+=4; 01162 01163 *_pos=pos=o[pos]>>2; 01164 memmove(((char*)p)+(pos+1)*size,((char*)p)+pos*size,(nc-pos)*size); 01165 01166 return p; 01167 } 01168 01169 // Add a child node to the given element. 01170 XMLNode XMLNode::addChild_priv(int memoryIncrease, XMLSTR lpszName, char isDeclaration, int pos) 01171 { 01172 if (!lpszName) return emptyXMLNode; 01173 d->pChild=(XMLNode*)addToOrder(memoryIncrease,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); 01174 d->pChild[pos].d=NULL; 01175 d->pChild[pos]=XMLNode(d,lpszName,isDeclaration); 01176 d->nChild++; 01177 return d->pChild[pos]; 01178 } 01179 01180 // Add an attribute to an element. 01181 XMLAttribute *XMLNode::addAttribute_priv(int memoryIncrease,XMLSTR lpszName, XMLSTR lpszValuev) 01182 { 01183 if (!lpszName) return &emptyXMLAttribute; 01184 if (!d) { myFree(lpszName); myFree(lpszValuev); return &emptyXMLAttribute; } 01185 int nc=d->nAttribute; 01186 d->pAttribute=(XMLAttribute*)myRealloc(d->pAttribute,(nc+1),memoryIncrease,sizeof(XMLAttribute)); 01187 XMLAttribute *pAttr=d->pAttribute+nc; 01188 pAttr->lpszName = lpszName; 01189 pAttr->lpszValue = lpszValuev; 01190 d->nAttribute++; 01191 return pAttr; 01192 } 01193 01194 // Add text to the element. 01195 XMLCSTR XMLNode::addText_priv(int memoryIncrease, XMLSTR lpszValue, int pos) 01196 { 01197 if (!lpszValue) return NULL; 01198 if (!d) { myFree(lpszValue); return NULL; } 01199 d->pText=(XMLCSTR*)addToOrder(memoryIncrease,&pos,d->nText,d->pText,sizeof(XMLSTR),eNodeText); 01200 d->pText[pos]=lpszValue; 01201 d->nText++; 01202 return lpszValue; 01203 } 01204 01205 // Add clear (unformatted) text to the element. 01206 XMLClear *XMLNode::addClear_priv(int memoryIncrease, XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, int pos) 01207 { 01208 if (!lpszValue) return &emptyXMLClear; 01209 if (!d) { myFree(lpszValue); return &emptyXMLClear; } 01210 d->pClear=(XMLClear *)addToOrder(memoryIncrease,&pos,d->nClear,d->pClear,sizeof(XMLClear),eNodeClear); 01211 XMLClear *pNewClear=d->pClear+pos; 01212 pNewClear->lpszValue = lpszValue; 01213 if (!lpszOpen) lpszOpen=XMLClearTags->lpszOpen; 01214 if (!lpszClose) lpszClose=XMLClearTags->lpszClose; 01215 pNewClear->lpszOpenTag = lpszOpen; 01216 pNewClear->lpszCloseTag = lpszClose; 01217 d->nClear++; 01218 return pNewClear; 01219 } 01220 01221 // private: 01222 // Parse a clear (unformatted) type node. 01223 char XMLNode::parseClearTag(void *px, void *_pClear) 01224 { 01225 XML *pXML=(XML *)px; 01226 ALLXMLClearTag pClear=*((ALLXMLClearTag*)_pClear); 01227 int cbTemp=0; 01228 XMLCSTR lpszTemp=NULL; 01229 XMLCSTR lpXML=&pXML->lpXML[pXML->nIndex]; 01230 static XMLCSTR docTypeEnd=_CXML("]>"); 01231 01232 // Find the closing tag 01233 // Seems the <!DOCTYPE need a better treatment so lets handle it 01234 if (pClear.lpszOpen==XMLClearTags[1].lpszOpen) 01235 { 01236 XMLCSTR pCh=lpXML; 01237 while (*pCh) 01238 { 01239 if (*pCh==_CXML('<')) { pClear.lpszClose=docTypeEnd; lpszTemp=xstrstr(lpXML,docTypeEnd); break; } 01240 else if (*pCh==_CXML('>')) { lpszTemp=pCh; break; } 01241 #ifdef _XMLWIDECHAR 01242 pCh++; 01243 #else 01244 pCh+=XML_ByteTable[(unsigned char)(*pCh)]; 01245 #endif 01246 } 01247 } else lpszTemp=xstrstr(lpXML, pClear.lpszClose); 01248 01249 if (lpszTemp) 01250 { 01251 // Cache the size and increment the index 01252 cbTemp = (int)(lpszTemp - lpXML); 01253 01254 pXML->nIndex += cbTemp+(int)xstrlen(pClear.lpszClose); 01255 01256 // Add the clear node to the current element 01257 addClear_priv(MEMORYINCREASE,cbTemp?stringDup(lpXML,cbTemp):NULL, pClear.lpszOpen, pClear.lpszClose,-1); 01258 return 0; 01259 } 01260 01261 // If we failed to find the end tag 01262 pXML->error = eXMLErrorUnmatchedEndClearTag; 01263 return 1; 01264 } 01265 01266 void XMLNode::exactMemory(XMLNodeData *d) 01267 { 01268 if (d->pOrder) d->pOrder=(int*)realloc(d->pOrder,(d->nChild+d->nText+d->nClear)*sizeof(int)); 01269 if (d->pChild) d->pChild=(XMLNode*)realloc(d->pChild,d->nChild*sizeof(XMLNode)); 01270 if (d->pAttribute) d->pAttribute=(XMLAttribute*)realloc(d->pAttribute,d->nAttribute*sizeof(XMLAttribute)); 01271 if (d->pText) d->pText=(XMLCSTR*)realloc(d->pText,d->nText*sizeof(XMLSTR)); 01272 if (d->pClear) d->pClear=(XMLClear *)realloc(d->pClear,d->nClear*sizeof(XMLClear)); 01273 } 01274 01275 char XMLNode::maybeAddTxT(void *pa, XMLCSTR tokenPStr) 01276 { 01277 XML *pXML=(XML *)pa; 01278 XMLCSTR lpszText=pXML->lpszText; 01279 if (!lpszText) return 0; 01280 if (dropWhiteSpace) while (XML_isSPACECHAR(*lpszText)&&(lpszText!=tokenPStr)) lpszText++; 01281 int cbText = (int)(tokenPStr - lpszText); 01282 if (!cbText) { pXML->lpszText=NULL; return 0; } 01283 if (dropWhiteSpace) { cbText--; while ((cbText)&&XML_isSPACECHAR(lpszText[cbText])) cbText--; cbText++; } 01284 if (!cbText) { pXML->lpszText=NULL; return 0; } 01285 XMLSTR lpt=fromXMLString(lpszText,cbText,pXML); 01286 if (!lpt) return 1; 01287 pXML->lpszText=NULL; 01288 if (removeCommentsInMiddleOfText && d->nText && d->nClear) 01289 { 01290 // if the previous insertion was a comment (<!-- -->) AND 01291 // if the previous previous insertion was a text then, delete the comment and append the text 01292 int n=d->nChild+d->nText+d->nClear-1,*o=d->pOrder; 01293 if (((o[n]&3)==eNodeClear)&&((o[n-1]&3)==eNodeText)) 01294 { 01295 int i=o[n]>>2; 01296 if (d->pClear[i].lpszOpenTag==XMLClearTags[2].lpszOpen) 01297 { 01298 deleteClear(i); 01299 i=o[n-1]>>2; 01300 n=xstrlen(d->pText[i]); 01301 int n2=xstrlen(lpt)+1; 01302 d->pText[i]=(XMLSTR)realloc((void*)d->pText[i],(n+n2)*sizeof(XMLCHAR)); 01303 if (!d->pText[i]) return 1; 01304 memcpy((void*)(d->pText[i]+n),lpt,n2*sizeof(XMLCHAR)); 01305 free(lpt); 01306 return 0; 01307 } 01308 } 01309 } 01310 addText_priv(MEMORYINCREASE,lpt,-1); 01311 return 0; 01312 } 01313 // private: 01314 // Recursively parse an XML element. 01315 int XMLNode::ParseXMLElement(void *pa) 01316 { 01317 XML *pXML=(XML *)pa; 01318 int cbToken; 01319 enum XMLTokenTypeTag xtype; 01320 NextToken token; 01321 XMLCSTR lpszTemp=NULL; 01322 int cbTemp=0; 01323 char nDeclaration; 01324 XMLNode pNew; 01325 enum XMLStatus status; // inside or outside a tag 01326 enum Attrib attrib = eAttribName; 01327 01328 assert(pXML); 01329 01330 // If this is the first call to the function 01331 if (pXML->nFirst) 01332 { 01333 // Assume we are outside of a tag definition 01334 pXML->nFirst = FALSE; 01335 status = eOutsideTag; 01336 } else 01337 { 01338 // If this is not the first call then we should only be called when inside a tag. 01339 status = eInsideTag; 01340 } 01341 01342 // Iterate through the tokens in the document 01343 for(;;) 01344 { 01345 // Obtain the next token 01346 token = GetNextToken(pXML, &cbToken, &xtype); 01347 01348 if (xtype != eTokenError) 01349 { 01350 // Check the current status 01351 switch(status) 01352 { 01353 01354 // If we are outside of a tag definition 01355 case eOutsideTag: 01356 01357 // Check what type of token we obtained 01358 switch(xtype) 01359 { 01360 // If we have found text or quoted text 01361 case eTokenText: 01362 case eTokenCloseTag: /* '>' */ 01363 case eTokenShortHandClose: /* '/>' */ 01364 case eTokenQuotedText: 01365 case eTokenEquals: 01366 break; 01367 01368 // If we found a start tag '<' and declarations '<?' 01369 case eTokenTagStart: 01370 case eTokenDeclaration: 01371 01372 // Cache whether this new element is a declaration or not 01373 nDeclaration = (xtype == eTokenDeclaration); 01374 01375 // If we have node text then add this to the element 01376 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01377 01378 // Find the name of the tag 01379 token = GetNextToken(pXML, &cbToken, &xtype); 01380 01381 // Return an error if we couldn't obtain the next token or 01382 // it wasnt text 01383 if (xtype != eTokenText) 01384 { 01385 pXML->error = eXMLErrorMissingTagName; 01386 return FALSE; 01387 } 01388 01389 // If we found a new element which is the same as this 01390 // element then we need to pass this back to the caller.. 01391 01392 #ifdef APPROXIMATE_PARSING 01393 if (d->lpszName && 01394 myTagCompare(d->lpszName, token.pStr) == 0) 01395 { 01396 // Indicate to the caller that it needs to create a 01397 // new element. 01398 pXML->lpNewElement = token.pStr; 01399 pXML->cbNewElement = cbToken; 01400 return TRUE; 01401 } else 01402 #endif 01403 { 01404 // If the name of the new element differs from the name of 01405 // the current element we need to add the new element to 01406 // the current one and recurse 01407 pNew = addChild_priv(MEMORYINCREASE,stringDup(token.pStr,cbToken), nDeclaration,-1); 01408 01409 while (!pNew.isEmpty()) 01410 { 01411 // Callself to process the new node. If we return 01412 // FALSE this means we dont have any more 01413 // processing to do... 01414 01415 if (!pNew.ParseXMLElement(pXML)) return FALSE; 01416 else 01417 { 01418 // If the call to recurse this function 01419 // evented in a end tag specified in XML then 01420 // we need to unwind the calls to this 01421 // function until we find the appropriate node 01422 // (the element name and end tag name must 01423 // match) 01424 if (pXML->cbEndTag) 01425 { 01426 // If we are back at the root node then we 01427 // have an unmatched end tag 01428 if (!d->lpszName) 01429 { 01430 pXML->error=eXMLErrorUnmatchedEndTag; 01431 return FALSE; 01432 } 01433 01434 // If the end tag matches the name of this 01435 // element then we only need to unwind 01436 // once more... 01437 01438 if (myTagCompare(d->lpszName, pXML->lpEndTag)==0) 01439 { 01440 pXML->cbEndTag = 0; 01441 } 01442 01443 return TRUE; 01444 } else 01445 if (pXML->cbNewElement) 01446 { 01447 // If the call indicated a new element is to 01448 // be created on THIS element. 01449 01450 // If the name of this element matches the 01451 // name of the element we need to create 01452 // then we need to return to the caller 01453 // and let it process the element. 01454 01455 if (myTagCompare(d->lpszName, pXML->lpNewElement)==0) 01456 { 01457 return TRUE; 01458 } 01459 01460 // Add the new element and recurse 01461 pNew = addChild_priv(MEMORYINCREASE,stringDup(pXML->lpNewElement,pXML->cbNewElement),0,-1); 01462 pXML->cbNewElement = 0; 01463 } 01464 else 01465 { 01466 // If we didn't have a new element to create 01467 pNew = emptyXMLNode; 01468 01469 } 01470 } 01471 } 01472 } 01473 break; 01474 01475 // If we found an end tag 01476 case eTokenTagEnd: 01477 01478 // If we have node text then add this to the element 01479 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01480 01481 // Find the name of the end tag 01482 token = GetNextToken(pXML, &cbTemp, &xtype); 01483 01484 // The end tag should be text 01485 if (xtype != eTokenText) 01486 { 01487 pXML->error = eXMLErrorMissingEndTagName; 01488 return FALSE; 01489 } 01490 lpszTemp = token.pStr; 01491 01492 // After the end tag we should find a closing tag 01493 token = GetNextToken(pXML, &cbToken, &xtype); 01494 if (xtype != eTokenCloseTag) 01495 { 01496 pXML->error = eXMLErrorMissingEndTagName; 01497 return FALSE; 01498 } 01499 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01500 01501 // We need to return to the previous caller. If the name 01502 // of the tag cannot be found we need to keep returning to 01503 // caller until we find a match 01504 if (myTagCompare(d->lpszName, lpszTemp) != 0) 01505 #ifdef STRICT_PARSING 01506 { 01507 pXML->error=eXMLErrorUnmatchedEndTag; 01508 pXML->nIndexMissigEndTag=pXML->nIndex; 01509 return FALSE; 01510 } 01511 #else 01512 { 01513 pXML->error=eXMLErrorMissingEndTag; 01514 pXML->nIndexMissigEndTag=pXML->nIndex; 01515 pXML->lpEndTag = lpszTemp; 01516 pXML->cbEndTag = cbTemp; 01517 } 01518 #endif 01519 01520 // Return to the caller 01521 exactMemory(d); 01522 return TRUE; 01523 01524 // If we found a clear (unformatted) token 01525 case eTokenClear: 01526 // If we have node text then add this to the element 01527 if (maybeAddTxT(pXML,token.pStr)) return FALSE; 01528 if (parseClearTag(pXML, token.pClr)) return FALSE; 01529 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01530 break; 01531 01532 default: 01533 break; 01534 } 01535 break; 01536 01537 // If we are inside a tag definition we need to search for attributes 01538 case eInsideTag: 01539 01540 // Check what part of the attribute (name, equals, value) we 01541 // are looking for. 01542 switch(attrib) 01543 { 01544 // If we are looking for a new attribute 01545 case eAttribName: 01546 01547 // Check what the current token type is 01548 switch(xtype) 01549 { 01550 // If the current type is text... 01551 // Eg. 'attribute' 01552 case eTokenText: 01553 // Cache the token then indicate that we are next to 01554 // look for the equals 01555 lpszTemp = token.pStr; 01556 cbTemp = cbToken; 01557 attrib = eAttribEquals; 01558 break; 01559 01560 // If we found a closing tag... 01561 // Eg. '>' 01562 case eTokenCloseTag: 01563 // We are now outside the tag 01564 status = eOutsideTag; 01565 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01566 break; 01567 01568 // If we found a short hand '/>' closing tag then we can 01569 // return to the caller 01570 case eTokenShortHandClose: 01571 exactMemory(d); 01572 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01573 return TRUE; 01574 01575 // Errors... 01576 case eTokenQuotedText: /* '"SomeText"' */ 01577 case eTokenTagStart: /* '<' */ 01578 case eTokenTagEnd: /* '</' */ 01579 case eTokenEquals: /* '=' */ 01580 case eTokenDeclaration: /* '<?' */ 01581 case eTokenClear: 01582 pXML->error = eXMLErrorUnexpectedToken; 01583 return FALSE; 01584 default: break; 01585 } 01586 break; 01587 01588 // If we are looking for an equals 01589 case eAttribEquals: 01590 // Check what the current token type is 01591 switch(xtype) 01592 { 01593 // If the current type is text... 01594 // Eg. 'Attribute AnotherAttribute' 01595 case eTokenText: 01596 // Add the unvalued attribute to the list 01597 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); 01598 // Cache the token then indicate. We are next to 01599 // look for the equals attribute 01600 lpszTemp = token.pStr; 01601 cbTemp = cbToken; 01602 break; 01603 01604 // If we found a closing tag 'Attribute >' or a short hand 01605 // closing tag 'Attribute />' 01606 case eTokenShortHandClose: 01607 case eTokenCloseTag: 01608 // If we are a declaration element '<?' then we need 01609 // to remove extra closing '?' if it exists 01610 pXML->lpszText=pXML->lpXML+pXML->nIndex; 01611 01612 if (d->isDeclaration && 01613 (lpszTemp[cbTemp-1]) == _CXML('?')) 01614 { 01615 cbTemp--; 01616 if (d->pParent && d->pParent->pParent) xtype = eTokenShortHandClose; 01617 } 01618 01619 if (cbTemp) 01620 { 01621 // Add the unvalued attribute to the list 01622 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp), NULL); 01623 } 01624 01625 // If this is the end of the tag then return to the caller 01626 if (xtype == eTokenShortHandClose) 01627 { 01628 exactMemory(d); 01629 return TRUE; 01630 } 01631 01632 // We are now outside the tag 01633 status = eOutsideTag; 01634 break; 01635 01636 // If we found the equals token... 01637 // Eg. 'Attribute =' 01638 case eTokenEquals: 01639 // Indicate that we next need to search for the value 01640 // for the attribute 01641 attrib = eAttribValue; 01642 break; 01643 01644 // Errors... 01645 case eTokenQuotedText: /* 'Attribute "InvalidAttr"'*/ 01646 case eTokenTagStart: /* 'Attribute <' */ 01647 case eTokenTagEnd: /* 'Attribute </' */ 01648 case eTokenDeclaration: /* 'Attribute <?' */ 01649 case eTokenClear: 01650 pXML->error = eXMLErrorUnexpectedToken; 01651 return FALSE; 01652 default: break; 01653 } 01654 break; 01655 01656 // If we are looking for an attribute value 01657 case eAttribValue: 01658 // Check what the current token type is 01659 switch(xtype) 01660 { 01661 // If the current type is text or quoted text... 01662 // Eg. 'Attribute = "Value"' or 'Attribute = Value' or 01663 // 'Attribute = 'Value''. 01664 case eTokenText: 01665 case eTokenQuotedText: 01666 // If we are a declaration element '<?' then we need 01667 // to remove extra closing '?' if it exists 01668 if (d->isDeclaration && 01669 (token.pStr[cbToken-1]) == _CXML('?')) 01670 { 01671 cbToken--; 01672 } 01673 01674 if (cbTemp) 01675 { 01676 // Add the valued attribute to the list 01677 if (xtype==eTokenQuotedText) { token.pStr++; cbToken-=2; } 01678 XMLSTR attrVal=(XMLSTR)token.pStr; 01679 if (attrVal) 01680 { 01681 attrVal=fromXMLString(attrVal,cbToken,pXML); 01682 if (!attrVal) return FALSE; 01683 } 01684 addAttribute_priv(MEMORYINCREASE,stringDup(lpszTemp,cbTemp),attrVal); 01685 } 01686 01687 // Indicate we are searching for a new attribute 01688 attrib = eAttribName; 01689 break; 01690 01691 // Errors... 01692 case eTokenTagStart: /* 'Attr = <' */ 01693 case eTokenTagEnd: /* 'Attr = </' */ 01694 case eTokenCloseTag: /* 'Attr = >' */ 01695 case eTokenShortHandClose: /* "Attr = />" */ 01696 case eTokenEquals: /* 'Attr = =' */ 01697 case eTokenDeclaration: /* 'Attr = <?' */ 01698 case eTokenClear: 01699 pXML->error = eXMLErrorUnexpectedToken; 01700 return FALSE; 01701 break; 01702 default: break; 01703 } 01704 } 01705 } 01706 } 01707 // If we failed to obtain the next token 01708 else 01709 { 01710 if ((!d->isDeclaration)&&(d->pParent)) 01711 { 01712 #ifdef STRICT_PARSING 01713 pXML->error=eXMLErrorUnmatchedEndTag; 01714 #else 01715 pXML->error=eXMLErrorMissingEndTag; 01716 #endif 01717 pXML->nIndexMissigEndTag=pXML->nIndex; 01718 } 01719 maybeAddTxT(pXML,pXML->lpXML+pXML->nIndex); 01720 return FALSE; 01721 } 01722 } 01723 } 01724 01725 // Count the number of lines and columns in an XML string. 01726 static void CountLinesAndColumns(XMLCSTR lpXML, int nUpto, XMLResults *pResults) 01727 { 01728 XMLCHAR ch; 01729 assert(lpXML); 01730 assert(pResults); 01731 01732 struct XML xml={ lpXML,lpXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; 01733 01734 pResults->nLine = 1; 01735 pResults->nColumn = 1; 01736 while (xml.nIndex<nUpto) 01737 { 01738 ch = getNextChar(&xml); 01739 if (ch != _CXML('\n')) pResults->nColumn++; 01740 else 01741 { 01742 pResults->nLine++; 01743 pResults->nColumn=1; 01744 } 01745 } 01746 } 01747 01748 // Parse XML and return the root element. 01749 XMLNode XMLNode::parseString(XMLCSTR lpszXML, XMLCSTR tag, XMLResults *pResults) 01750 { 01751 if (!lpszXML) 01752 { 01753 if (pResults) 01754 { 01755 pResults->error=eXMLErrorNoElements; 01756 pResults->nLine=0; 01757 pResults->nColumn=0; 01758 } 01759 return emptyXMLNode; 01760 } 01761 01762 XMLNode xnode(NULL,NULL,FALSE); 01763 struct XML xml={ lpszXML, lpszXML, 0, 0, eXMLErrorNone, NULL, 0, NULL, 0, TRUE }; 01764 01765 // Create header element 01766 xnode.ParseXMLElement(&xml); 01767 enum XMLError error = xml.error; 01768 if (!xnode.nChildNode()) error=eXMLErrorNoXMLTagFound; 01769 if ((xnode.nChildNode()==1)&&(xnode.nElement()==1)) xnode=xnode.getChildNode(); // skip the empty node 01770 01771 // If no error occurred 01772 if ((error==eXMLErrorNone)||(error==eXMLErrorMissingEndTag)||(error==eXMLErrorNoXMLTagFound)) 01773 { 01774 XMLCSTR name=xnode.getName(); 01775 if (tag&&(*tag)&&((!name)||(xstricmp(name,tag)))) 01776 { 01777 xnode=xnode.getChildNode(tag); 01778 if (xnode.isEmpty()) 01779 { 01780 if (pResults) 01781 { 01782 pResults->error=eXMLErrorFirstTagNotFound; 01783 pResults->nLine=0; 01784 pResults->nColumn=0; 01785 } 01786 return emptyXMLNode; 01787 } 01788 } 01789 } else 01790 { 01791 // Cleanup: this will destroy all the nodes 01792 xnode = emptyXMLNode; 01793 } 01794 01795 01796 // If we have been given somewhere to place results 01797 if (pResults) 01798 { 01799 pResults->error = error; 01800 01801 // If we have an error 01802 if (error!=eXMLErrorNone) 01803 { 01804 if (error==eXMLErrorMissingEndTag) xml.nIndex=xml.nIndexMissigEndTag; 01805 // Find which line and column it starts on. 01806 CountLinesAndColumns(xml.lpXML, xml.nIndex, pResults); 01807 } 01808 } 01809 return xnode; 01810 } 01811 01812 XMLNode XMLNode::parseFile(XMLCSTR filename, XMLCSTR tag, XMLResults *pResults) 01813 { 01814 if (pResults) { pResults->nLine=0; pResults->nColumn=0; } 01815 FILE *f=xfopen(filename,_CXML("rb")); 01816 if (f==NULL) { if (pResults) pResults->error=eXMLErrorFileNotFound; return emptyXMLNode; } 01817 fseek(f,0,SEEK_END); 01818 int l=(int)ftell(f),headerSz=0; 01819 if (!l) { if (pResults) pResults->error=eXMLErrorEmpty; fclose(f); return emptyXMLNode; } 01820 fseek(f,0,SEEK_SET); 01821 unsigned char *buf=(unsigned char*)malloc(l+4); 01822 l=(int)fread(buf,1,l,f); 01823 fclose(f); 01824 buf[l]=0;buf[l+1]=0;buf[l+2]=0;buf[l+3]=0; 01825 #ifdef _XMLWIDECHAR 01826 if (guessWideCharChars) 01827 { 01828 if (!myIsTextWideChar(buf,l)) 01829 { 01830 XMLNode::XMLCharEncoding ce=XMLNode::char_encoding_legacy; 01831 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) { headerSz=3; ce=XMLNode::char_encoding_UTF8; } 01832 XMLSTR b2=myMultiByteToWideChar((const char*)(buf+headerSz),ce); 01833 if (!b2) 01834 { 01835 // todo: unable to convert 01836 } 01837 free(buf); buf=(unsigned char*)b2; headerSz=0; 01838 } else 01839 { 01840 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01841 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01842 } 01843 } else 01844 { 01845 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01846 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01847 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01848 } 01849 #else 01850 if (guessWideCharChars) 01851 { 01852 if (myIsTextWideChar(buf,l)) 01853 { 01854 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01855 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01856 char *b2=myWideCharToMultiByte((const wchar_t*)(buf+headerSz)); 01857 free(buf); buf=(unsigned char*)b2; headerSz=0; 01858 } else 01859 { 01860 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01861 } 01862 } else 01863 { 01864 if ((buf[0]==0xef)&&(buf[1]==0xff)) headerSz=2; 01865 if ((buf[0]==0xff)&&(buf[1]==0xfe)) headerSz=2; 01866 if ((buf[0]==0xef)&&(buf[1]==0xbb)&&(buf[2]==0xbf)) headerSz=3; 01867 } 01868 #endif 01869 01870 if (!buf) { if (pResults) pResults->error=eXMLErrorCharConversionError; return emptyXMLNode; } 01871 XMLNode x=parseString((XMLSTR)(buf+headerSz),tag,pResults); 01872 free(buf); 01873 return x; 01874 } 01875 01876 static inline void charmemset(XMLSTR dest,XMLCHAR c,int l) { while (l--) *(dest++)=c; } 01877 // private: 01878 // Creates an user friendly XML string from a given element with 01879 // appropriate white space and carriage returns. 01880 // 01881 // This recurses through all subnodes then adds contents of the nodes to the 01882 // string. 01883 int XMLNode::CreateXMLStringR(XMLNodeData *pEntry, XMLSTR lpszMarker, int nFormat) 01884 { 01885 int nResult = 0; 01886 int cb=nFormat<0?0:nFormat; 01887 int cbElement; 01888 int nChildFormat=-1; 01889 int nElementI=pEntry->nChild+pEntry->nText+pEntry->nClear; 01890 int i,j; 01891 if ((nFormat>=0)&&(nElementI==1)&&(pEntry->nText==1)&&(!pEntry->isDeclaration)) nFormat=-2; 01892 01893 assert(pEntry); 01894 01895 #define LENSTR(lpsz) (lpsz ? xstrlen(lpsz) : 0) 01896 01897 // If the element has no name then assume this is the head node. 01898 cbElement = (int)LENSTR(pEntry->lpszName); 01899 01900 if (cbElement) 01901 { 01902 // "<elementname " 01903 if (lpszMarker) 01904 { 01905 if (cb) charmemset(lpszMarker, INDENTCHAR, cb); 01906 nResult = cb; 01907 lpszMarker[nResult++]=_CXML('<'); 01908 if (pEntry->isDeclaration) lpszMarker[nResult++]=_CXML('?'); 01909 xstrcpy(&lpszMarker[nResult], pEntry->lpszName); 01910 nResult+=cbElement; 01911 lpszMarker[nResult++]=_CXML(' '); 01912 01913 } else 01914 { 01915 nResult+=cbElement+2+cb; 01916 if (pEntry->isDeclaration) nResult++; 01917 } 01918 01919 // Enumerate attributes and add them to the string 01920 XMLAttribute *pAttr=pEntry->pAttribute; 01921 for (i=0; i<pEntry->nAttribute; i++) 01922 { 01923 // "Attrib 01924 cb = (int)LENSTR(pAttr->lpszName); 01925 if (cb) 01926 { 01927 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pAttr->lpszName); 01928 nResult += cb; 01929 // "Attrib=Value " 01930 if (pAttr->lpszValue) 01931 { 01932 cb=(int)ToXMLStringTool::lengthXMLString(pAttr->lpszValue); 01933 if (lpszMarker) 01934 { 01935 lpszMarker[nResult]=_CXML('='); 01936 lpszMarker[nResult+1]=_CXML('"'); 01937 if (cb) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+2],pAttr->lpszValue); 01938 lpszMarker[nResult+cb+2]=_CXML('"'); 01939 } 01940 nResult+=cb+3; 01941 } 01942 if (lpszMarker) lpszMarker[nResult] = _CXML(' '); 01943 nResult++; 01944 } 01945 pAttr++; 01946 } 01947 01948 if (pEntry->isDeclaration) 01949 { 01950 if (lpszMarker) 01951 { 01952 lpszMarker[nResult-1]=_CXML('?'); 01953 lpszMarker[nResult]=_CXML('>'); 01954 } 01955 nResult++; 01956 if (nFormat!=-1) 01957 { 01958 if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); 01959 nResult++; 01960 } 01961 } else 01962 // If there are child nodes we need to terminate the start tag 01963 if (nElementI) 01964 { 01965 if (lpszMarker) lpszMarker[nResult-1]=_CXML('>'); 01966 if (nFormat>=0) 01967 { 01968 if (lpszMarker) lpszMarker[nResult]=_CXML('\n'); 01969 nResult++; 01970 } 01971 } else nResult--; 01972 } 01973 01974 // Calculate the child format for when we recurse. This is used to 01975 // determine the number of spaces used for prefixes. 01976 if (nFormat!=-1) 01977 { 01978 if (cbElement&&(!pEntry->isDeclaration)) nChildFormat=nFormat+1; 01979 else nChildFormat=nFormat; 01980 } 01981 01982 // Enumerate through remaining children 01983 for (i=0; i<nElementI; i++) 01984 { 01985 j=pEntry->pOrder[i]; 01986 switch((XMLElementType)(j&3)) 01987 { 01988 // Text nodes 01989 case eNodeText: 01990 { 01991 // "Text" 01992 XMLCSTR pChild=pEntry->pText[j>>2]; 01993 cb = (int)ToXMLStringTool::lengthXMLString(pChild); 01994 if (cb) 01995 { 01996 if (nFormat>=0) 01997 { 01998 if (lpszMarker) 01999 { 02000 charmemset(&lpszMarker[nResult],INDENTCHAR,nFormat+1); 02001 ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult+nFormat+1],pChild); 02002 lpszMarker[nResult+nFormat+1+cb]=_CXML('\n'); 02003 } 02004 nResult+=cb+nFormat+2; 02005 } else 02006 { 02007 if (lpszMarker) ToXMLStringTool::toXMLUnSafe(&lpszMarker[nResult], pChild); 02008 nResult += cb; 02009 } 02010 } 02011 break; 02012 } 02013 02014 // Clear type nodes 02015 case eNodeClear: 02016 { 02017 XMLClear *pChild=pEntry->pClear+(j>>2); 02018 // "OpenTag" 02019 cb = (int)LENSTR(pChild->lpszOpenTag); 02020 if (cb) 02021 { 02022 if (nFormat!=-1) 02023 { 02024 if (lpszMarker) 02025 { 02026 charmemset(&lpszMarker[nResult], INDENTCHAR, nFormat+1); 02027 xstrcpy(&lpszMarker[nResult+nFormat+1], pChild->lpszOpenTag); 02028 } 02029 nResult+=cb+nFormat+1; 02030 } 02031 else 02032 { 02033 if (lpszMarker)xstrcpy(&lpszMarker[nResult], pChild->lpszOpenTag); 02034 nResult += cb; 02035 } 02036 } 02037 02038 // "OpenTag Value" 02039 cb = (int)LENSTR(pChild->lpszValue); 02040 if (cb) 02041 { 02042 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszValue); 02043 nResult += cb; 02044 } 02045 02046 // "OpenTag Value CloseTag" 02047 cb = (int)LENSTR(pChild->lpszCloseTag); 02048 if (cb) 02049 { 02050 if (lpszMarker) xstrcpy(&lpszMarker[nResult], pChild->lpszCloseTag); 02051 nResult += cb; 02052 } 02053 02054 if (nFormat!=-1) 02055 { 02056 if (lpszMarker) lpszMarker[nResult] = _CXML('\n'); 02057 nResult++; 02058 } 02059 break; 02060 } 02061 02062 // Element nodes 02063 case eNodeChild: 02064 { 02065 // Recursively add child nodes 02066 nResult += CreateXMLStringR(pEntry->pChild[j>>2].d, lpszMarker ? lpszMarker + nResult : 0, nChildFormat); 02067 break; 02068 } 02069 default: break; 02070 } 02071 } 02072 02073 if ((cbElement)&&(!pEntry->isDeclaration)) 02074 { 02075 // If we have child entries we need to use long XML notation for 02076 // closing the element - "<elementname>blah blah blah</elementname>" 02077 if (nElementI) 02078 { 02079 // "</elementname>\0" 02080 if (lpszMarker) 02081 { 02082 if (nFormat >=0) 02083 { 02084 charmemset(&lpszMarker[nResult], INDENTCHAR,nFormat); 02085 nResult+=nFormat; 02086 } 02087 02088 lpszMarker[nResult]=_CXML('<'); lpszMarker[nResult+1]=_CXML('/'); 02089 nResult += 2; 02090 xstrcpy(&lpszMarker[nResult], pEntry->lpszName); 02091 nResult += cbElement; 02092 02093 lpszMarker[nResult]=_CXML('>'); 02094 if (nFormat == -1) nResult++; 02095 else 02096 { 02097 lpszMarker[nResult+1]=_CXML('\n'); 02098 nResult+=2; 02099 } 02100 } else 02101 { 02102 if (nFormat>=0) nResult+=cbElement+4+nFormat; 02103 else if (nFormat==-1) nResult+=cbElement+3; 02104 else nResult+=cbElement+4; 02105 } 02106 } else 02107 { 02108 // If there are no children we can use shorthand XML notation - 02109 // "<elementname/>" 02110 // "/>\0" 02111 if (lpszMarker) 02112 { 02113 lpszMarker[nResult]=_CXML('/'); lpszMarker[nResult+1]=_CXML('>'); 02114 if (nFormat != -1) lpszMarker[nResult+2]=_CXML('\n'); 02115 } 02116 nResult += nFormat == -1 ? 2 : 3; 02117 } 02118 } 02119 02120 return nResult; 02121 } 02122 02123 #undef LENSTR 02124 02125 // Create an XML string 02126 // @param int nFormat - 0 if no formatting is required 02127 // otherwise nonzero for formatted text 02128 // with carriage returns and indentation. 02129 // @param int *pnSize - [out] pointer to the size of the 02130 // returned string not including the 02131 // NULL terminator. 02132 // @return XMLSTR - Allocated XML string, you must free 02133 // this with free(). 02134 XMLSTR XMLNode::createXMLString(int nFormat, int *pnSize) const 02135 { 02136 if (!d) { if (pnSize) *pnSize=0; return NULL; } 02137 02138 XMLSTR lpszResult = NULL; 02139 int cbStr; 02140 02141 // Recursively Calculate the size of the XML string 02142 if (!dropWhiteSpace) nFormat=0; 02143 nFormat = nFormat ? 0 : -1; 02144 cbStr = CreateXMLStringR(d, 0, nFormat); 02145 // Alllocate memory for the XML string + the NULL terminator and 02146 // create the recursively XML string. 02147 lpszResult=(XMLSTR)malloc((cbStr+1)*sizeof(XMLCHAR)); 02148 CreateXMLStringR(d, lpszResult, nFormat); 02149 lpszResult[cbStr]=_CXML('\0'); 02150 if (pnSize) *pnSize = cbStr; 02151 return lpszResult; 02152 } 02153 02154 int XMLNode::detachFromParent(XMLNodeData *d) 02155 { 02156 XMLNode *pa=d->pParent->pChild; 02157 int i=0; 02158 while (((void*)(pa[i].d))!=((void*)d)) i++; 02159 d->pParent->nChild--; 02160 if (d->pParent->nChild) memmove(pa+i,pa+i+1,(d->pParent->nChild-i)*sizeof(XMLNode)); 02161 else { free(pa); d->pParent->pChild=NULL; } 02162 return removeOrderElement(d->pParent,eNodeChild,i); 02163 } 02164 02165 XMLNode::~XMLNode() 02166 { 02167 if (!d) return; 02168 d->ref_count--; 02169 emptyTheNode(0); 02170 } 02171 void XMLNode::deleteNodeContent() 02172 { 02173 if (!d) return; 02174 if (d->pParent) { detachFromParent(d); d->pParent=NULL; d->ref_count--; } 02175 emptyTheNode(1); 02176 } 02177 void XMLNode::emptyTheNode(char force) 02178 { 02179 XMLNodeData *dd=d; // warning: must stay this way! 02180 if ((dd->ref_count==0)||force) 02181 { 02182 if (d->pParent) detachFromParent(d); 02183 int i; 02184 XMLNode *pc; 02185 for(i=0; i<dd->nChild; i++) 02186 { 02187 pc=dd->pChild+i; 02188 pc->d->pParent=NULL; 02189 pc->d->ref_count--; 02190 pc->emptyTheNode(force); 02191 } 02192 myFree(dd->pChild); 02193 for(i=0; i<dd->nText; i++) free((void*)dd->pText[i]); 02194 myFree(dd->pText); 02195 for(i=0; i<dd->nClear; i++) free((void*)dd->pClear[i].lpszValue); 02196 myFree(dd->pClear); 02197 for(i=0; i<dd->nAttribute; i++) 02198 { 02199 free((void*)dd->pAttribute[i].lpszName); 02200 if (dd->pAttribute[i].lpszValue) free((void*)dd->pAttribute[i].lpszValue); 02201 } 02202 myFree(dd->pAttribute); 02203 myFree(dd->pOrder); 02204 myFree((void*)dd->lpszName); 02205 dd->nChild=0; dd->nText=0; dd->nClear=0; dd->nAttribute=0; 02206 dd->pChild=NULL; dd->pText=NULL; dd->pClear=NULL; dd->pAttribute=NULL; 02207 dd->pOrder=NULL; dd->lpszName=NULL; dd->pParent=NULL; 02208 } 02209 if (dd->ref_count==0) 02210 { 02211 free(dd); 02212 d=NULL; 02213 } 02214 } 02215 02216 XMLNode& XMLNode::operator=( const XMLNode& A ) 02217 { 02218 // shallow copy 02219 if (this != &A) 02220 { 02221 if (d) { d->ref_count--; emptyTheNode(0); } 02222 d=A.d; 02223 if (d) (d->ref_count) ++ ; 02224 } 02225 return *this; 02226 } 02227 02228 XMLNode::XMLNode(const XMLNode &A) 02229 { 02230 // shallow copy 02231 d=A.d; 02232 if (d) (d->ref_count)++ ; 02233 } 02234 02235 XMLNode XMLNode::deepCopy() const 02236 { 02237 if (!d) return XMLNode::emptyXMLNode; 02238 XMLNode x(NULL,stringDup(d->lpszName),d->isDeclaration); 02239 XMLNodeData *p=x.d; 02240 int n=d->nAttribute; 02241 if (n) 02242 { 02243 p->nAttribute=n; p->pAttribute=(XMLAttribute*)malloc(n*sizeof(XMLAttribute)); 02244 while (n--) 02245 { 02246 p->pAttribute[n].lpszName=stringDup(d->pAttribute[n].lpszName); 02247 p->pAttribute[n].lpszValue=stringDup(d->pAttribute[n].lpszValue); 02248 } 02249 } 02250 if (d->pOrder) 02251 { 02252 n=(d->nChild+d->nText+d->nClear)*sizeof(int); p->pOrder=(int*)malloc(n); memcpy(p->pOrder,d->pOrder,n); 02253 } 02254 n=d->nText; 02255 if (n) 02256 { 02257 p->nText=n; p->pText=(XMLCSTR*)malloc(n*sizeof(XMLCSTR)); 02258 while(n--) p->pText[n]=stringDup(d->pText[n]); 02259 } 02260 n=d->nClear; 02261 if (n) 02262 { 02263 p->nClear=n; p->pClear=(XMLClear*)malloc(n*sizeof(XMLClear)); 02264 while (n--) 02265 { 02266 p->pClear[n].lpszCloseTag=d->pClear[n].lpszCloseTag; 02267 p->pClear[n].lpszOpenTag=d->pClear[n].lpszOpenTag; 02268 p->pClear[n].lpszValue=stringDup(d->pClear[n].lpszValue); 02269 } 02270 } 02271 n=d->nChild; 02272 if (n) 02273 { 02274 p->nChild=n; p->pChild=(XMLNode*)malloc(n*sizeof(XMLNode)); 02275 while (n--) 02276 { 02277 p->pChild[n].d=NULL; 02278 p->pChild[n]=d->pChild[n].deepCopy(); 02279 p->pChild[n].d->pParent=p; 02280 } 02281 } 02282 return x; 02283 } 02284 02285 XMLNode XMLNode::addChild(XMLNode childNode, int pos) 02286 { 02287 XMLNodeData *dc=childNode.d; 02288 if ((!dc)||(!d)) return childNode; 02289 if (!dc->lpszName) 02290 { 02291 // this is a root node: todo: correct fix 02292 int j=pos; 02293 while (dc->nChild) 02294 { 02295 addChild(dc->pChild[0],j); 02296 if (pos>=0) j++; 02297 } 02298 return childNode; 02299 } 02300 if (dc->pParent) { if ((detachFromParent(dc)<=pos)&&(dc->pParent==d)) pos--; } else dc->ref_count++; 02301 dc->pParent=d; 02302 // int nc=d->nChild; 02303 // d->pChild=(XMLNode*)myRealloc(d->pChild,(nc+1),memoryIncrease,sizeof(XMLNode)); 02304 d->pChild=(XMLNode*)addToOrder(0,&pos,d->nChild,d->pChild,sizeof(XMLNode),eNodeChild); 02305 d->pChild[pos].d=dc; 02306 d->nChild++; 02307 return childNode; 02308 } 02309 02310 void XMLNode::deleteAttribute(int i) 02311 { 02312 if ((!d)||(i<0)||(i>=d->nAttribute)) return; 02313 d->nAttribute--; 02314 XMLAttribute *p=d->pAttribute+i; 02315 free((void*)p->lpszName); 02316 if (p->lpszValue) free((void*)p->lpszValue); 02317 if (d->nAttribute) memmove(p,p+1,(d->nAttribute-i)*sizeof(XMLAttribute)); else { free(p); d->pAttribute=NULL; } 02318 } 02319 02320 void XMLNode::deleteAttribute(XMLAttribute *a){ if (a) deleteAttribute(a->lpszName); } 02321 void XMLNode::deleteAttribute(XMLCSTR lpszName) 02322 { 02323 int j=0; 02324 getAttribute(lpszName,&j); 02325 if (j) deleteAttribute(j-1); 02326 } 02327 02328 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,int i) 02329 { 02330 if (!d) { if (lpszNewValue) free(lpszNewValue); if (lpszNewName) free(lpszNewName); return NULL; } 02331 if (i>=d->nAttribute) 02332 { 02333 if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); 02334 return NULL; 02335 } 02336 XMLAttribute *p=d->pAttribute+i; 02337 if (p->lpszValue&&p->lpszValue!=lpszNewValue) free((void*)p->lpszValue); 02338 p->lpszValue=lpszNewValue; 02339 if (lpszNewName&&p->lpszName!=lpszNewName) { free((void*)p->lpszName); p->lpszName=lpszNewName; }; 02340 return p; 02341 } 02342 02343 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) 02344 { 02345 if (oldAttribute) return updateAttribute_WOSD((XMLSTR)newAttribute->lpszValue,(XMLSTR)newAttribute->lpszName,oldAttribute->lpszName); 02346 return addAttribute_WOSD((XMLSTR)newAttribute->lpszName,(XMLSTR)newAttribute->lpszValue); 02347 } 02348 02349 XMLAttribute *XMLNode::updateAttribute_WOSD(XMLSTR lpszNewValue, XMLSTR lpszNewName,XMLCSTR lpszOldName) 02350 { 02351 int j=0; 02352 getAttribute(lpszOldName,&j); 02353 if (j) return updateAttribute_WOSD(lpszNewValue,lpszNewName,j-1); 02354 else 02355 { 02356 if (lpszNewName) return addAttribute_WOSD(lpszNewName,lpszNewValue); 02357 else return addAttribute_WOSD(stringDup(lpszOldName),lpszNewValue); 02358 } 02359 } 02360 02361 int XMLNode::indexText(XMLCSTR lpszValue) const 02362 { 02363 if (!d) return -1; 02364 int i,l=d->nText; 02365 if (!lpszValue) { if (l) return 0; return -1; } 02366 XMLCSTR *p=d->pText; 02367 for (i=0; i<l; i++) if (lpszValue==p[i]) return i; 02368 return -1; 02369 } 02370 02371 void XMLNode::deleteText(int i) 02372 { 02373 if ((!d)||(i<0)||(i>=d->nText)) return; 02374 d->nText--; 02375 XMLCSTR *p=d->pText+i; 02376 free((void*)*p); 02377 if (d->nText) memmove(p,p+1,(d->nText-i)*sizeof(XMLCSTR)); else { free(p); d->pText=NULL; } 02378 removeOrderElement(d,eNodeText,i); 02379 } 02380 02381 void XMLNode::deleteText(XMLCSTR lpszValue) { deleteText(indexText(lpszValue)); } 02382 02383 XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, int i) 02384 { 02385 if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } 02386 if (i>=d->nText) return addText_WOSD(lpszNewValue); 02387 XMLCSTR *p=d->pText+i; 02388 if (*p!=lpszNewValue) { free((void*)*p); *p=lpszNewValue; } 02389 return lpszNewValue; 02390 } 02391 02392 XMLCSTR XMLNode::updateText_WOSD(XMLSTR lpszNewValue, XMLCSTR lpszOldValue) 02393 { 02394 if (!d) { if (lpszNewValue) free(lpszNewValue); return NULL; } 02395 int i=indexText(lpszOldValue); 02396 if (i>=0) return updateText_WOSD(lpszNewValue,i); 02397 return addText_WOSD(lpszNewValue); 02398 } 02399 02400 void XMLNode::deleteClear(int i) 02401 { 02402 if ((!d)||(i<0)||(i>=d->nClear)) return; 02403 d->nClear--; 02404 XMLClear *p=d->pClear+i; 02405 free((void*)p->lpszValue); 02406 if (d->nClear) memmove(p,p+1,(d->nClear-i)*sizeof(XMLClear)); else { free(p); d->pClear=NULL; } 02407 removeOrderElement(d,eNodeClear,i); 02408 } 02409 02410 int XMLNode::indexClear(XMLCSTR lpszValue) const 02411 { 02412 if (!d) return -1; 02413 int i,l=d->nClear; 02414 if (!lpszValue) { if (l) return 0; return -1; } 02415 XMLClear *p=d->pClear; 02416 for (i=0; i<l; i++) if (lpszValue==p[i].lpszValue) return i; 02417 return -1; 02418 } 02419 02420 void XMLNode::deleteClear(XMLCSTR lpszValue) { deleteClear(indexClear(lpszValue)); } 02421 void XMLNode::deleteClear(XMLClear *a) { if (a) deleteClear(a->lpszValue); } 02422 02423 XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, int i) 02424 { 02425 if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } 02426 if (i>=d->nClear) return addClear_WOSD(lpszNewContent); 02427 XMLClear *p=d->pClear+i; 02428 if (lpszNewContent!=p->lpszValue) { free((void*)p->lpszValue); p->lpszValue=lpszNewContent; } 02429 return p; 02430 } 02431 02432 XMLClear *XMLNode::updateClear_WOSD(XMLSTR lpszNewContent, XMLCSTR lpszOldValue) 02433 { 02434 if (!d) { if (lpszNewContent) free(lpszNewContent); return NULL; } 02435 int i=indexClear(lpszOldValue); 02436 if (i>=0) return updateClear_WOSD(lpszNewContent,i); 02437 return addClear_WOSD(lpszNewContent); 02438 } 02439 02440 XMLClear *XMLNode::updateClear_WOSD(XMLClear *newP,XMLClear *oldP) 02441 { 02442 if (oldP) return updateClear_WOSD((XMLSTR)newP->lpszValue,(XMLSTR)oldP->lpszValue); 02443 return NULL; 02444 } 02445 02446 int XMLNode::nChildNode(XMLCSTR name) const 02447 { 02448 if (!d) return 0; 02449 int i,j=0,n=d->nChild; 02450 XMLNode *pc=d->pChild; 02451 for (i=0; i<n; i++) 02452 { 02453 if (xstricmp(pc->d->lpszName, name)==0) j++; 02454 pc++; 02455 } 02456 return j; 02457 } 02458 02459 XMLNode XMLNode::getChildNode(XMLCSTR name, int *j) const 02460 { 02461 if (!d) return emptyXMLNode; 02462 int i=0,n=d->nChild; 02463 if (j) i=*j; 02464 XMLNode *pc=d->pChild+i; 02465 for (; i<n; i++) 02466 { 02467 if (!xstricmp(pc->d->lpszName, name)) 02468 { 02469 if (j) *j=i+1; 02470 return *pc; 02471 } 02472 pc++; 02473 } 02474 return emptyXMLNode; 02475 } 02476 02477 XMLNode XMLNode::getChildNode(XMLCSTR name, int j) const 02478 { 02479 if (!d) return emptyXMLNode; 02480 if (j>=0) 02481 { 02482 int i=0; 02483 while (j-->0) getChildNode(name,&i); 02484 return getChildNode(name,&i); 02485 } 02486 int i=d->nChild; 02487 while (i--) if (!xstricmp(name,d->pChild[i].d->lpszName)) break; 02488 if (i<0) return emptyXMLNode; 02489 return getChildNode(i); 02490 } 02491 02492 XMLNode XMLNode::getChildNodeByPath(XMLCSTR _path, char createMissing, XMLCHAR sep) 02493 { 02494 XMLSTR path=stringDup(_path); 02495 XMLNode x=getChildNodeByPathNonConst(path,createMissing,sep); 02496 if (path) free(path); 02497 return x; 02498 } 02499 02500 XMLNode XMLNode::getChildNodeByPathNonConst(XMLSTR path, char createIfMissing, XMLCHAR sep) 02501 { 02502 if ((!path)||(!(*path))) return *this; 02503 XMLNode xn,xbase=*this; 02504 XMLCHAR *tend1,sepString[2]; sepString[0]=sep; sepString[1]=0; 02505 tend1=xstrstr(path,sepString); 02506 while(tend1) 02507 { 02508 *tend1=0; 02509 xn=xbase.getChildNode(path); 02510 if (xn.isEmpty()) 02511 { 02512 if (createIfMissing) xn=xbase.addChild(path); 02513 else { *tend1=sep; return XMLNode::emptyXMLNode; } 02514 } 02515 *tend1=sep; 02516 xbase=xn; 02517 path=tend1+1; 02518 tend1=xstrstr(path,sepString); 02519 } 02520 xn=xbase.getChildNode(path); 02521 if (xn.isEmpty()&&createIfMissing) xn=xbase.addChild(path); 02522 return xn; 02523 } 02524 02525 XMLElementPosition XMLNode::positionOfText (int i) const { if (i>=d->nText ) i=d->nText-1; return findPosition(d,i,eNodeText ); } 02526 XMLElementPosition XMLNode::positionOfClear (int i) const { if (i>=d->nClear) i=d->nClear-1; return findPosition(d,i,eNodeClear); } 02527 XMLElementPosition XMLNode::positionOfChildNode(int i) const { if (i>=d->nChild) i=d->nChild-1; return findPosition(d,i,eNodeChild); } 02528 XMLElementPosition XMLNode::positionOfText (XMLCSTR lpszValue) const { return positionOfText (indexText (lpszValue)); } 02529 XMLElementPosition XMLNode::positionOfClear(XMLCSTR lpszValue) const { return positionOfClear(indexClear(lpszValue)); } 02530 XMLElementPosition XMLNode::positionOfClear(XMLClear *a) const { if (a) return positionOfClear(a->lpszValue); return positionOfClear(); } 02531 XMLElementPosition XMLNode::positionOfChildNode(XMLNode x) const 02532 { 02533 if ((!d)||(!x.d)) return -1; 02534 XMLNodeData *dd=x.d; 02535 XMLNode *pc=d->pChild; 02536 int i=d->nChild; 02537 while (i--) if (pc[i].d==dd) return findPosition(d,i,eNodeChild); 02538 return -1; 02539 } 02540 XMLElementPosition XMLNode::positionOfChildNode(XMLCSTR name, int count) const 02541 { 02542 if (!name) return positionOfChildNode(count); 02543 int j=0; 02544 do { getChildNode(name,&j); if (j<0) return -1; } while (count--); 02545 return findPosition(d,j-1,eNodeChild); 02546 } 02547 02548 XMLNode XMLNode::getChildNodeWithAttribute(XMLCSTR name,XMLCSTR attributeName,XMLCSTR attributeValue, int *k) const 02549 { 02550 int i=0,j; 02551 if (k) i=*k; 02552 XMLNode x; 02553 XMLCSTR t; 02554 do 02555 { 02556 x=getChildNode(name,&i); 02557 if (!x.isEmpty()) 02558 { 02559 if (attributeValue) 02560 { 02561 j=0; 02562 do 02563 { 02564 t=x.getAttribute(attributeName,&j); 02565 if (t&&(xstricmp(attributeValue,t)==0)) { if (k) *k=i; return x; } 02566 } while (t); 02567 } else 02568 { 02569 if (x.isAttributeSet(attributeName)) { if (k) *k=i; return x; } 02570 } 02571 } 02572 } while (!x.isEmpty()); 02573 return emptyXMLNode; 02574 } 02575 02576 // Find an attribute on an node. 02577 XMLCSTR XMLNode::getAttribute(XMLCSTR lpszAttrib, int *j) const 02578 { 02579 if (!d) return NULL; 02580 int i=0,n=d->nAttribute; 02581 if (j) i=*j; 02582 XMLAttribute *pAttr=d->pAttribute+i; 02583 for (; i<n; i++) 02584 { 02585 if (xstricmp(pAttr->lpszName, lpszAttrib)==0) 02586 { 02587 if (j) *j=i+1; 02588 return pAttr->lpszValue; 02589 } 02590 pAttr++; 02591 } 02592 return NULL; 02593 } 02594 02595 char XMLNode::isAttributeSet(XMLCSTR lpszAttrib) const 02596 { 02597 if (!d) return FALSE; 02598 int i,n=d->nAttribute; 02599 XMLAttribute *pAttr=d->pAttribute; 02600 for (i=0; i<n; i++) 02601 { 02602 if (xstricmp(pAttr->lpszName, lpszAttrib)==0) 02603 { 02604 return TRUE; 02605 } 02606 pAttr++; 02607 } 02608 return FALSE; 02609 } 02610 02611 XMLCSTR XMLNode::getAttribute(XMLCSTR name, int j) const 02612 { 02613 if (!d) return NULL; 02614 int i=0; 02615 while (j-->0) getAttribute(name,&i); 02616 return getAttribute(name,&i); 02617 } 02618 02619 XMLNodeContents XMLNode::enumContents(int i) const 02620 { 02621 XMLNodeContents c; 02622 if (!d) { c.etype=eNodeNULL; return c; } 02623 if (i<d->nAttribute) 02624 { 02625 c.etype=eNodeAttribute; 02626 c.attrib=d->pAttribute[i]; 02627 return c; 02628 } 02629 i-=d->nAttribute; 02630 c.etype=(XMLElementType)(d->pOrder[i]&3); 02631 i=(d->pOrder[i])>>2; 02632 switch (c.etype) 02633 { 02634 case eNodeChild: c.child = d->pChild[i]; break; 02635 case eNodeText: c.text = d->pText[i]; break; 02636 case eNodeClear: c.clear = d->pClear[i]; break; 02637 default: break; 02638 } 02639 return c; 02640 } 02641 02642 XMLCSTR XMLNode::getName() const { if (!d) return NULL; return d->lpszName; } 02643 int XMLNode::nText() const { if (!d) return 0; return d->nText; } 02644 int XMLNode::nChildNode() const { if (!d) return 0; return d->nChild; } 02645 int XMLNode::nAttribute() const { if (!d) return 0; return d->nAttribute; } 02646 int XMLNode::nClear() const { if (!d) return 0; return d->nClear; } 02647 int XMLNode::nElement() const { if (!d) return 0; return d->nAttribute+d->nChild+d->nText+d->nClear; } 02648 XMLClear XMLNode::getClear (int i) const { if ((!d)||(i>=d->nClear )) return emptyXMLClear; return d->pClear[i]; } 02649 XMLAttribute XMLNode::getAttribute (int i) const { if ((!d)||(i>=d->nAttribute)) return emptyXMLAttribute; return d->pAttribute[i]; } 02650 XMLCSTR XMLNode::getAttributeName (int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszName; } 02651 XMLCSTR XMLNode::getAttributeValue(int i) const { if ((!d)||(i>=d->nAttribute)) return NULL; return d->pAttribute[i].lpszValue; } 02652 XMLCSTR XMLNode::getText (int i) const { if ((!d)||(i>=d->nText )) return NULL; return d->pText[i]; } 02653 XMLNode XMLNode::getChildNode (int i) const { if ((!d)||(i>=d->nChild )) return emptyXMLNode; return d->pChild[i]; } 02654 XMLNode XMLNode::getParentNode ( ) const { if ((!d)||(!d->pParent )) return emptyXMLNode; return XMLNode(d->pParent); } 02655 char XMLNode::isDeclaration ( ) const { if (!d) return 0; return d->isDeclaration; } 02656 char XMLNode::isEmpty ( ) const { return (d==NULL); } 02657 XMLNode XMLNode::emptyNode ( ) { return XMLNode::emptyXMLNode; } 02658 02659 XMLNode XMLNode::addChild(XMLCSTR lpszName, char isDeclaration, XMLElementPosition pos) 02660 { return addChild_priv(0,stringDup(lpszName),isDeclaration,pos); } 02661 XMLNode XMLNode::addChild_WOSD(XMLSTR lpszName, char isDeclaration, XMLElementPosition pos) 02662 { return addChild_priv(0,lpszName,isDeclaration,pos); } 02663 XMLAttribute *XMLNode::addAttribute(XMLCSTR lpszName, XMLCSTR lpszValue) 02664 { return addAttribute_priv(0,stringDup(lpszName),stringDup(lpszValue)); } 02665 XMLAttribute *XMLNode::addAttribute_WOSD(XMLSTR lpszName, XMLSTR lpszValuev) 02666 { return addAttribute_priv(0,lpszName,lpszValuev); } 02667 XMLCSTR XMLNode::addText(XMLCSTR lpszValue, XMLElementPosition pos) 02668 { return addText_priv(0,stringDup(lpszValue),pos); } 02669 XMLCSTR XMLNode::addText_WOSD(XMLSTR lpszValue, XMLElementPosition pos) 02670 { return addText_priv(0,lpszValue,pos); } 02671 XMLClear *XMLNode::addClear(XMLCSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) 02672 { return addClear_priv(0,stringDup(lpszValue),lpszOpen,lpszClose,pos); } 02673 XMLClear *XMLNode::addClear_WOSD(XMLSTR lpszValue, XMLCSTR lpszOpen, XMLCSTR lpszClose, XMLElementPosition pos) 02674 { return addClear_priv(0,lpszValue,lpszOpen,lpszClose,pos); } 02675 XMLCSTR XMLNode::updateName(XMLCSTR lpszName) 02676 { return updateName_WOSD(stringDup(lpszName)); } 02677 XMLAttribute *XMLNode::updateAttribute(XMLAttribute *newAttribute, XMLAttribute *oldAttribute) 02678 { return updateAttribute_WOSD(stringDup(newAttribute->lpszValue),stringDup(newAttribute->lpszName),oldAttribute->lpszName); } 02679 XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,int i) 02680 { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),i); } 02681 XMLAttribute *XMLNode::updateAttribute(XMLCSTR lpszNewValue, XMLCSTR lpszNewName,XMLCSTR lpszOldName) 02682 { return updateAttribute_WOSD(stringDup(lpszNewValue),stringDup(lpszNewName),lpszOldName); } 02683 XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, int i) 02684 { return updateText_WOSD(stringDup(lpszNewValue),i); } 02685 XMLCSTR XMLNode::updateText(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) 02686 { return updateText_WOSD(stringDup(lpszNewValue),lpszOldValue); } 02687 XMLClear *XMLNode::updateClear(XMLCSTR lpszNewContent, int i) 02688 { return updateClear_WOSD(stringDup(lpszNewContent),i); } 02689 XMLClear *XMLNode::updateClear(XMLCSTR lpszNewValue, XMLCSTR lpszOldValue) 02690 { return updateClear_WOSD(stringDup(lpszNewValue),lpszOldValue); } 02691 XMLClear *XMLNode::updateClear(XMLClear *newP,XMLClear *oldP) 02692 { return updateClear_WOSD(stringDup(newP->lpszValue),oldP->lpszValue); } 02693 02694 char XMLNode::setGlobalOptions(XMLCharEncoding _characterEncoding, char _guessWideCharChars, 02695 char _dropWhiteSpace, char _removeCommentsInMiddleOfText) 02696 { 02697 guessWideCharChars=_guessWideCharChars; dropWhiteSpace=_dropWhiteSpace; removeCommentsInMiddleOfText=_removeCommentsInMiddleOfText; 02698 #ifdef _XMLWIDECHAR 02699 if (_characterEncoding) characterEncoding=_characterEncoding; 02700 #else 02701 switch(_characterEncoding) 02702 { 02703 case char_encoding_UTF8: characterEncoding=_characterEncoding; XML_ByteTable=XML_utf8ByteTable; break; 02704 case char_encoding_legacy: characterEncoding=_characterEncoding; XML_ByteTable=XML_legacyByteTable; break; 02705 case char_encoding_ShiftJIS: characterEncoding=_characterEncoding; XML_ByteTable=XML_sjisByteTable; break; 02706 case char_encoding_GB2312: characterEncoding=_characterEncoding; XML_ByteTable=XML_gb2312ByteTable; break; 02707 case char_encoding_Big5: 02708 case char_encoding_GBK: characterEncoding=_characterEncoding; XML_ByteTable=XML_gbk_big5_ByteTable; break; 02709 default: return 1; 02710 } 02711 #endif 02712 return 0; 02713 } 02714 02715 XMLNode::XMLCharEncoding XMLNode::guessCharEncoding(void *buf,int l, char useXMLEncodingAttribute) 02716 { 02717 #ifdef _XMLWIDECHAR 02718 return (XMLCharEncoding)0; 02719 #else 02720 if (l<25) return (XMLCharEncoding)0; 02721 if (guessWideCharChars&&(myIsTextWideChar(buf,l))) return (XMLCharEncoding)0; 02722 unsigned char *b=(unsigned char*)buf; 02723 if ((b[0]==0xef)&&(b[1]==0xbb)&&(b[2]==0xbf)) return char_encoding_UTF8; 02724 02725 // Match utf-8 model ? 02726 XMLCharEncoding bestGuess=char_encoding_UTF8; 02727 int i=0; 02728 while (i<l) 02729 switch (XML_utf8ByteTable[b[i]]) 02730 { 02731 case 4: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02732 case 3: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02733 case 2: i++; if ((i<l)&&(b[i]& 0xC0)!=0x80) { bestGuess=char_encoding_legacy; i=l; } // 10bbbbbb ? 02734 case 1: i++; break; 02735 case 0: i=l; 02736 } 02737 if (!useXMLEncodingAttribute) return bestGuess; 02738 // if encoding is specified and different from utf-8 than it's non-utf8 02739 // otherwise it's utf-8 02740 char bb[201]; 02741 l=mmin(l,200); 02742 memcpy(bb,buf,l); // copy buf into bb to be able to do "bb[l]=0" 02743 bb[l]=0; 02744 b=(unsigned char*)strstr(bb,"encoding"); 02745 if (!b) return bestGuess; 02746 b+=8; while XML_isSPACECHAR(*b) b++; if (*b!='=') return bestGuess; 02747 b++; while XML_isSPACECHAR(*b) b++; if ((*b!='\'')&&(*b!='"')) return bestGuess; 02748 b++; while XML_isSPACECHAR(*b) b++; 02749 02750 if ((xstrnicmp((char*)b,"utf-8",5)==0)|| 02751 (xstrnicmp((char*)b,"utf8",4)==0)) 02752 { 02753 if (bestGuess==char_encoding_legacy) return char_encoding_error; 02754 return char_encoding_UTF8; 02755 } 02756 02757 if ((xstrnicmp((char*)b,"shiftjis",8)==0)|| 02758 (xstrnicmp((char*)b,"shift-jis",9)==0)|| 02759 (xstrnicmp((char*)b,"sjis",4)==0)) return char_encoding_ShiftJIS; 02760 02761 if (xstrnicmp((char*)b,"GB2312",6)==0) return char_encoding_GB2312; 02762 if (xstrnicmp((char*)b,"Big5",4)==0) return char_encoding_Big5; 02763 if (xstrnicmp((char*)b,"GBK",3)==0) return char_encoding_GBK; 02764 02765 return char_encoding_legacy; 02766 #endif 02767 } 02768 #undef XML_isSPACECHAR 02769 02770 ////////////////////////////////////////////////////////// 02771 // Here starts the base64 conversion functions. // 02772 ////////////////////////////////////////////////////////// 02773 02774 static const char base64Fillchar = _CXML('='); // used to mark partial words at the end 02775 02776 // this lookup table defines the base64 encoding 02777 XMLCSTR base64EncodeTable=_CXML("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); 02778 02779 // Decode Table gives the index of any valid base64 character in the Base64 table] 02780 // 96: '=' - 97: space char - 98: illegal char - 99: end of string 02781 const unsigned char base64DecodeTable[] = { 02782 99,98,98,98,98,98,98,98,98,97, 97,98,98,97,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //00 -29 02783 98,98,97,98,98,98,98,98,98,98, 98,98,98,62,98,98,98,63,52,53, 54,55,56,57,58,59,60,61,98,98, //30 -59 02784 98,96,98,98,98, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, 15,16,17,18,19,20,21,22,23,24, //60 -89 02785 25,98,98,98,98,98,98,26,27,28, 29,30,31,32,33,34,35,36,37,38, 39,40,41,42,43,44,45,46,47,48, //90 -119 02786 49,50,51,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //120 -149 02787 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //150 -179 02788 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //180 -209 02789 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98,98,98,98,98, //210 -239 02790 98,98,98,98,98,98,98,98,98,98, 98,98,98,98,98,98 //240 -255 02791 }; 02792 02793 XMLParserBase64Tool::~XMLParserBase64Tool(){ freeBuffer(); } 02794 02795 void XMLParserBase64Tool::freeBuffer(){ if (buf) free(buf); buf=NULL; buflen=0; } 02796 02797 int XMLParserBase64Tool::encodeLength(int inlen, char formatted) 02798 { 02799 unsigned int i=((inlen-1)/3*4+4+1); 02800 if (formatted) i+=inlen/54; 02801 return i; 02802 } 02803 02804 XMLSTR XMLParserBase64Tool::encode(unsigned char *inbuf, unsigned int inlen, char formatted) 02805 { 02806 int i=encodeLength(inlen,formatted),k=17,eLen=inlen/3,j; 02807 alloc(i*sizeof(XMLCHAR)); 02808 XMLSTR curr=(XMLSTR)buf; 02809 for(i=0;i<eLen;i++) 02810 { 02811 // Copy next three bytes into lower 24 bits of int, paying attention to sign. 02812 j=(inbuf[0]<<16)|(inbuf[1]<<8)|inbuf[2]; inbuf+=3; 02813 // Encode the int into four chars 02814 *(curr++)=base64EncodeTable[ j>>18 ]; 02815 *(curr++)=base64EncodeTable[(j>>12)&0x3f]; 02816 *(curr++)=base64EncodeTable[(j>> 6)&0x3f]; 02817 *(curr++)=base64EncodeTable[(j )&0x3f]; 02818 if (formatted) { if (!k) { *(curr++)=_CXML('\n'); k=18; } k--; } 02819 } 02820 eLen=inlen-eLen*3; // 0 - 2. 02821 if (eLen==1) 02822 { 02823 *(curr++)=base64EncodeTable[ inbuf[0]>>2 ]; 02824 *(curr++)=base64EncodeTable[(inbuf[0]<<4)&0x3F]; 02825 *(curr++)=base64Fillchar; 02826 *(curr++)=base64Fillchar; 02827 } else if (eLen==2) 02828 { 02829 j=(inbuf[0]<<8)|inbuf[1]; 02830 *(curr++)=base64EncodeTable[ j>>10 ]; 02831 *(curr++)=base64EncodeTable[(j>> 4)&0x3f]; 02832 *(curr++)=base64EncodeTable[(j<< 2)&0x3f]; 02833 *(curr++)=base64Fillchar; 02834 } 02835 *(curr++)=0; 02836 return (XMLSTR)buf; 02837 } 02838 02839 unsigned int XMLParserBase64Tool::decodeSize(XMLCSTR data,XMLError *xe) 02840 { 02841 if (!data) return 0; 02842 if (xe) *xe=eXMLErrorNone; 02843 int size=0; 02844 unsigned char c; 02845 //skip any extra characters (e.g. newlines or spaces) 02846 while (*data) 02847 { 02848 #ifdef _XMLWIDECHAR 02849 if (*data>255) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02850 #endif 02851 c=base64DecodeTable[(unsigned char)(*data)]; 02852 if (c<97) size++; 02853 else if (c==98) { if (xe) *xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02854 data++; 02855 } 02856 if (xe&&(size%4!=0)) *xe=eXMLErrorBase64DataSizeIsNotMultipleOf4; 02857 if (size==0) return 0; 02858 do { data--; size--; } while(*data==base64Fillchar); size++; 02859 return (unsigned int)((size*3)/4); 02860 } 02861 02862 unsigned char XMLParserBase64Tool::decode(XMLCSTR data, unsigned char *buf, int len, XMLError *xe) 02863 { 02864 if (!data) return 0; 02865 if (xe) *xe=eXMLErrorNone; 02866 int i=0,p=0; 02867 unsigned char d,c; 02868 for(;;) 02869 { 02870 02871 #ifdef _XMLWIDECHAR 02872 #define BASE64DECODE_READ_NEXT_CHAR(c) \ 02873 do { \ 02874 if (data[i]>255){ c=98; break; } \ 02875 c=base64DecodeTable[(unsigned char)data[i++]]; \ 02876 }while (c==97); \ 02877 if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02878 #else 02879 #define BASE64DECODE_READ_NEXT_CHAR(c) \ 02880 do { c=base64DecodeTable[(unsigned char)data[i++]]; }while (c==97); \ 02881 if(c==98){ if(xe)*xe=eXMLErrorBase64DecodeIllegalCharacter; return 0; } 02882 #endif 02883 02884 BASE64DECODE_READ_NEXT_CHAR(c) 02885 if (c==99) { return 2; } 02886 if (c==96) 02887 { 02888 if (p==(int)len) return 2; 02889 if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; 02890 return 1; 02891 } 02892 02893 BASE64DECODE_READ_NEXT_CHAR(d) 02894 if ((d==99)||(d==96)) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02895 if (p==(int)len) { if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; return 0; } 02896 buf[p++]=(unsigned char)((c<<2)|((d>>4)&0x3)); 02897 02898 BASE64DECODE_READ_NEXT_CHAR(c) 02899 if (c==99) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02900 if (p==(int)len) 02901 { 02902 if (c==96) return 2; 02903 if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; 02904 return 0; 02905 } 02906 if (c==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02907 buf[p++]=(unsigned char)(((d<<4)&0xf0)|((c>>2)&0xf)); 02908 02909 BASE64DECODE_READ_NEXT_CHAR(d) 02910 if (d==99 ) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02911 if (p==(int)len) 02912 { 02913 if (d==96) return 2; 02914 if (xe) *xe=eXMLErrorBase64DecodeBufferTooSmall; 02915 return 0; 02916 } 02917 if (d==96) { if (xe) *xe=eXMLErrorBase64DecodeTruncatedData; return 1; } 02918 buf[p++]=(unsigned char)(((c<<6)&0xc0)|d); 02919 } 02920 } 02921 #undef BASE64DECODE_READ_NEXT_CHAR 02922 02923 void XMLParserBase64Tool::alloc(int newsize) 02924 { 02925 if ((!buf)&&(newsize)) { buf=malloc(newsize); buflen=newsize; return; } 02926 if (newsize>buflen) { buf=realloc(buf,newsize); buflen=newsize; } 02927 } 02928 02929 unsigned char *XMLParserBase64Tool::decode(XMLCSTR data, int *outlen, XMLError *xe) 02930 { 02931 if (xe) *xe=eXMLErrorNone; 02932 if (!data) { *outlen=0; return (unsigned char*)""; } 02933 unsigned int len=decodeSize(data,xe); 02934 if (outlen) *outlen=len; 02935 if (!len) return NULL; 02936 alloc(len+1); 02937 if(!decode(data,(unsigned char*)buf,len,xe)){ return NULL; } 02938 return (unsigned char*)buf; 02939 } 02940