llsdserialize_xml.cpp

Go to the documentation of this file.
00001 
00032 #include "linden_common.h"
00033 #include "llsdserialize_xml.h"
00034 
00035 #include <iostream>
00036 #include <deque>
00037 
00038 #include "apr-1/apr_base64.h"
00039 
00040 extern "C"
00041 {
00042 #ifdef LL_STANDALONE
00043 # include <expat.h>
00044 #else
00045 # include "expat/expat.h"
00046 #endif
00047 }
00048 
00052 LLSDXMLFormatter::LLSDXMLFormatter()
00053 {
00054 }
00055 
00056 // virtual
00057 LLSDXMLFormatter::~LLSDXMLFormatter()
00058 {
00059 }
00060 
00061 // virtual
00062 S32 LLSDXMLFormatter::format(const LLSD& data, std::ostream& ostr, U32 options) const
00063 {
00064         std::streamsize old_precision = ostr.precision(25);
00065 
00066         LLString post = "";
00067         if (options & LLSDFormatter::OPTIONS_PRETTY)
00068         {
00069                 post = "\n";
00070         }
00071         ostr << "<llsd>" << post;
00072         S32 rv = format_impl(data, ostr, options, 1);
00073         ostr << "</llsd>\n";
00074 
00075         ostr.precision(old_precision);
00076         return rv;
00077 }
00078 
00079 S32 LLSDXMLFormatter::format_impl(const LLSD& data, std::ostream& ostr, U32 options, U32 level) const
00080 {
00081         S32 format_count = 1;
00082         LLString pre = "";
00083         LLString post = "";
00084 
00085         if (options & LLSDFormatter::OPTIONS_PRETTY)
00086         {
00087                 for (U32 i = 0; i < level; i++)
00088                 {
00089                         pre += "    ";
00090                 }
00091                 post = "\n";
00092         }
00093 
00094         switch(data.type())
00095         {
00096         case LLSD::TypeMap:
00097                 if(0 == data.size())
00098                 {
00099                         ostr << pre << "<map />" << post;
00100                 }
00101                 else
00102                 {
00103                         ostr << pre << "<map>" << post;
00104                         LLSD::map_const_iterator iter = data.beginMap();
00105                         LLSD::map_const_iterator end = data.endMap();
00106                         for(; iter != end; ++iter)
00107                         {
00108                                 ostr << pre << "<key>" << escapeString((*iter).first) << "</key>" << post;
00109                                 format_count += format_impl((*iter).second, ostr, options, level + 1);
00110                         }
00111                         ostr << pre <<  "</map>" << post;
00112                 }
00113                 break;
00114 
00115         case LLSD::TypeArray:
00116                 if(0 == data.size())
00117                 {
00118                         ostr << pre << "<array />" << post;
00119                 }
00120                 else
00121                 {
00122                         ostr << pre << "<array>" << post;
00123                         LLSD::array_const_iterator iter = data.beginArray();
00124                         LLSD::array_const_iterator end = data.endArray();
00125                         for(; iter != end; ++iter)
00126                         {
00127                                 format_count += format_impl(*iter, ostr, options, level + 1);
00128                         }
00129                         ostr << pre << "</array>" << post;
00130                 }
00131                 break;
00132 
00133         case LLSD::TypeUndefined:
00134                 ostr << pre << "<undef />" << post;
00135                 break;
00136 
00137         case LLSD::TypeBoolean:
00138                 ostr << pre << "<boolean>";
00139                 if(mBoolAlpha ||
00140 #if( LL_WINDOWS || __GNUC__ > 2)
00141                    (ostr.flags() & std::ios::boolalpha)
00142 #else
00143                    (ostr.flags() & 0x0100)
00144 #endif
00145                         )
00146                 {
00147                         ostr << (data.asBoolean() ? "true" : "false");
00148                 }
00149                 else
00150                 {
00151                         ostr << (data.asBoolean() ? 1 : 0);
00152                 }
00153                 ostr << "</boolean>" << post;
00154                 break;
00155 
00156         case LLSD::TypeInteger:
00157                 ostr << pre << "<integer>" << data.asInteger() << "</integer>" << post;
00158                 break;
00159 
00160         case LLSD::TypeReal:
00161                 ostr << pre << "<real>";
00162                 if(mRealFormat.empty())
00163                 {
00164                         ostr << data.asReal();
00165                 }
00166                 else
00167                 {
00168                         formatReal(data.asReal(), ostr);
00169                 }
00170                 ostr << "</real>" << post;
00171                 break;
00172 
00173         case LLSD::TypeUUID:
00174                 if(data.asUUID().isNull()) ostr << pre << "<uuid />" << post;
00175                 else ostr << pre << "<uuid>" << data.asUUID() << "</uuid>" << post;
00176                 break;
00177 
00178         case LLSD::TypeString:
00179                 if(data.asString().empty()) ostr << pre << "<string />" << post;
00180                 else ostr << pre << "<string>" << escapeString(data.asString()) <<"</string>" << post;
00181                 break;
00182 
00183         case LLSD::TypeDate:
00184                 ostr << pre << "<date>" << data.asDate() << "</date>" << post;
00185                 break;
00186 
00187         case LLSD::TypeURI:
00188                 ostr << pre << "<uri>" << escapeString(data.asString()) << "</uri>" << post;
00189                 break;
00190 
00191         case LLSD::TypeBinary:
00192         {
00193                 LLSD::Binary buffer = data.asBinary();
00194                 if(buffer.empty())
00195                 {
00196                         ostr << pre << "<binary />" << post;
00197                 }
00198                 else
00199                 {
00200                         // *FIX: memory inefficient.
00201                         // *TODO: convert to use LLBase64
00202                         ostr << pre << "<binary encoding=\"base64\">";
00203                         int b64_buffer_length = apr_base64_encode_len(buffer.size());
00204                         char* b64_buffer = new char[b64_buffer_length];
00205                         b64_buffer_length = apr_base64_encode_binary(
00206                                 b64_buffer,
00207                                 &buffer[0],
00208                                 buffer.size());
00209                         ostr.write(b64_buffer, b64_buffer_length - 1);
00210                         delete[] b64_buffer;
00211                         ostr << "</binary>" << post;
00212                 }
00213                 break;
00214         }
00215         default:
00216                 // *NOTE: This should never happen.
00217                 ostr << pre << "<undef />" << post;
00218                 break;
00219         }
00220         return format_count;
00221 }
00222 
00223 // static
00224 std::string LLSDXMLFormatter::escapeString(const std::string& in)
00225 {
00226         std::ostringstream out;
00227         std::string::const_iterator it = in.begin();
00228         std::string::const_iterator end = in.end();
00229         for(; it != end; ++it)
00230         {
00231                 switch((*it))
00232                 {
00233                 case '<':
00234                         out << "&lt;";
00235                         break;
00236                 case '>':
00237                         out << "&gt;";
00238                         break;
00239                 case '&':
00240                         out << "&amp;";
00241                         break;
00242                 case '\'':
00243                         out << "&apos;";
00244                         break;
00245                 case '"':
00246                         out << "&quot;";
00247                         break;
00248                 default:
00249                         out << (*it);
00250                         break;
00251                 }
00252         }
00253         return out.str();
00254 }
00255 
00256 
00257 
00258 class LLSDXMLParser::Impl
00259 {
00260 public:
00261         Impl();
00262         ~Impl();
00263         
00264         S32 parse(std::istream& input, LLSD& data);
00265 
00266         void parsePart(const char *buf, int len);
00267         
00268 private:
00269         void reset();
00270         
00271         void startElementHandler(const XML_Char* name, const XML_Char** attributes);
00272         void endElementHandler(const XML_Char* name);
00273         void characterDataHandler(const XML_Char* data, int length);
00274         
00275         static void sStartElementHandler(
00276                 void* userData, const XML_Char* name, const XML_Char** attributes);
00277         static void sEndElementHandler(
00278                 void* userData, const XML_Char* name);
00279         static void sCharacterDataHandler(
00280                 void* userData, const XML_Char* data, int length);
00281 
00282         void startSkipping();
00283         
00284         enum Element {
00285                 ELEMENT_LLSD,
00286                 ELEMENT_UNDEF,
00287                 ELEMENT_BOOL,
00288                 ELEMENT_INTEGER,
00289                 ELEMENT_REAL,
00290                 ELEMENT_STRING,
00291                 ELEMENT_UUID,
00292                 ELEMENT_DATE,
00293                 ELEMENT_URI,
00294                 ELEMENT_BINARY,
00295                 ELEMENT_MAP,
00296                 ELEMENT_ARRAY,
00297                 ELEMENT_KEY,
00298                 ELEMENT_UNKNOWN
00299         };
00300         static Element readElement(const XML_Char* name);
00301         
00302         static const XML_Char* findAttribute(const XML_Char* name, const XML_Char** pairs);
00303         
00304 
00305         XML_Parser      mParser;
00306 
00307         LLSD mResult;
00308         S32 mParseCount;
00309         
00310         bool mInLLSDElement;
00311         bool mGracefullStop;
00312         
00313         typedef std::deque<LLSD*> LLSDRefStack;
00314         LLSDRefStack mStack;
00315         
00316         int mDepth;
00317         bool mSkipping;
00318         int mSkipThrough;
00319         
00320         std::string mCurrentKey;
00321         std::ostringstream mCurrentContent;
00322 
00323         bool mPreStaged;
00324 };
00325 
00326 
00327 LLSDXMLParser::Impl::Impl()
00328 {
00329         mParser = XML_ParserCreate(NULL);
00330         mPreStaged = false;
00331         reset();
00332 }
00333 
00334 LLSDXMLParser::Impl::~Impl()
00335 {
00336         XML_ParserFree(mParser);
00337 }
00338 
00339 bool is_eol(char c)
00340 {
00341         return (c == '\n' || c == '\r');
00342 }
00343 
00344 void clear_eol(std::istream& input)
00345 {
00346         char c = input.peek();
00347         while (input.good() && is_eol(c))
00348         {
00349                 input.get(c);
00350                 c = input.peek();
00351         }
00352 }
00353 
00354 static unsigned get_till_eol(std::istream& input, char *buf, unsigned bufsize)
00355 {
00356         unsigned count = 0;
00357         while (count < bufsize && input.good())
00358         {
00359                 input.get(buf[count]);
00360                 count++;
00361                 if (is_eol(buf[count - 1]))
00362                         break;
00363         }
00364         return count;
00365 }
00366 
00367 S32 LLSDXMLParser::Impl::parse(std::istream& input, LLSD& data)
00368 {
00369         reset();
00370         XML_Status status;
00371         
00372         static const int BUFFER_SIZE = 1024;
00373         void* buffer = NULL;    
00374         int count = 0;
00375         while (input.good() && !input.eof())
00376         {
00377                 buffer = XML_GetBuffer(mParser, BUFFER_SIZE);
00378 
00379                 /*
00380                  * If we happened to end our last buffer right at the end of the llsd, but the
00381                  * stream is still going we will get a null buffer here.  Check for mGracefullStop.
00382                  */
00383                 if (!buffer)
00384                 {
00385                         break;
00386                 }
00387                 count = get_till_eol(input, (char *)buffer, BUFFER_SIZE);
00388                 if (!count)
00389                 {
00390                         break;
00391                 }
00392                 status = XML_ParseBuffer(mParser, count, false);
00393 
00394                 if (status == XML_STATUS_ERROR)
00395                 {
00396                         break;
00397                 }
00398         }
00399         
00400         // *FIX.: This code is buggy - if the stream was empty or not
00401         // good, there is not buffer to parse, both the call to
00402         // XML_ParseBuffer and the buffer manipulations are illegal
00403         // futhermore, it isn't clear that the expat buffer semantics are
00404         // preserved
00405 
00406         status = XML_ParseBuffer(mParser, 0, true);
00407         if (status == XML_STATUS_ERROR && !mGracefullStop)
00408         {
00409                 if (buffer)
00410                 {
00411                         ((char*) buffer)[count ? count - 1 : 0] = '\0';
00412                 }
00413                 llinfos << "LLSDXMLParser::Impl::parse: XML_STATUS_ERROR parsing:" << (char*) buffer << llendl;
00414                 data = LLSD();
00415                 return LLSDParser::PARSE_FAILURE;
00416         }
00417 
00418         clear_eol(input);
00419         data = mResult;
00420         return mParseCount;
00421 }
00422 
00423 void LLSDXMLParser::Impl::reset()
00424 {
00425         if (mPreStaged)
00426         {
00427                 mPreStaged = false;
00428                 return;
00429         }
00430 
00431         mResult.clear();
00432         mParseCount = 0;
00433 
00434         mInLLSDElement = false;
00435         mDepth = 0;
00436 
00437         mGracefullStop = false;
00438 
00439         mStack.clear();
00440         
00441         mSkipping = false;
00442         
00443 #if( LL_WINDOWS || __GNUC__ > 2)
00444         mCurrentKey.clear();
00445 #else
00446         mCurrentKey = std::string();
00447 #endif
00448 
00449         
00450         XML_ParserReset(mParser, "utf-8");
00451         XML_SetUserData(mParser, this);
00452         XML_SetElementHandler(mParser, sStartElementHandler, sEndElementHandler);
00453         XML_SetCharacterDataHandler(mParser, sCharacterDataHandler);
00454 }
00455 
00456 
00457 void LLSDXMLParser::Impl::startSkipping()
00458 {
00459         mSkipping = true;
00460         mSkipThrough = mDepth;
00461 }
00462 
00463 const XML_Char*
00464 LLSDXMLParser::Impl::findAttribute(const XML_Char* name, const XML_Char** pairs)
00465 {
00466         while (NULL != pairs && NULL != *pairs)
00467         {
00468                 if(0 == strcmp(name, *pairs))
00469                 {
00470                         return *(pairs + 1);
00471                 }
00472                 pairs += 2;
00473         }
00474         return NULL;
00475 }
00476 
00477 void LLSDXMLParser::Impl::parsePart(const char* buf, int len)
00478 {
00479         void * buffer = XML_GetBuffer(mParser, len);
00480         if (buffer != NULL && buf != NULL)
00481         {
00482                 memcpy(buffer, buf, len);
00483         }
00484         XML_ParseBuffer(mParser, len, false);
00485 
00486         mPreStaged = true;
00487 }
00488 
00489 void LLSDXMLParser::Impl::startElementHandler(const XML_Char* name, const XML_Char** attributes)
00490 {
00491         ++mDepth;
00492         if (mSkipping)
00493         {
00494                 return;
00495         }
00496         
00497         Element element = readElement(name);
00498         mCurrentContent.str("");
00499 
00500         switch (element)
00501         {
00502                 case ELEMENT_LLSD:
00503                         if (mInLLSDElement) { return startSkipping(); }
00504                         mInLLSDElement = true;
00505                         return;
00506         
00507                 case ELEMENT_KEY:
00508                         if (mStack.empty()  ||  !(mStack.back()->isMap()))
00509                         {
00510                                 return startSkipping();
00511                         }
00512                         return;
00513 
00514                 case ELEMENT_BINARY:
00515                 {
00516                         const XML_Char* encoding = findAttribute("encoding", attributes);
00517                         if(encoding && strcmp("base64", encoding) != 0) { return startSkipping(); }
00518                         break;
00519                 }
00520                 
00521                 default:
00522                         // all rest are values, fall through
00523                         ;
00524         }
00525         
00526 
00527         if (!mInLLSDElement) { return startSkipping(); }
00528         
00529         if (mStack.empty())
00530         {
00531                 mStack.push_back(&mResult);
00532         }
00533         else if (mStack.back()->isMap())
00534         {
00535                 if (mCurrentKey.empty()) { return startSkipping(); }
00536                 
00537                 LLSD& map = *mStack.back();
00538                 LLSD& newElement = map[mCurrentKey];
00539                 mStack.push_back(&newElement);          
00540 
00541 #if( LL_WINDOWS || __GNUC__ > 2)
00542                 mCurrentKey.clear();
00543 #else
00544                 mCurrentKey = std::string();
00545 #endif
00546         }
00547         else if (mStack.back()->isArray())
00548         {
00549                 LLSD& array = *mStack.back();
00550                 array.append(LLSD());
00551                 LLSD& newElement = array[array.size()-1];
00552                 mStack.push_back(&newElement);
00553         }
00554         else {
00555                 // improperly nested value in a non-structure
00556                 return startSkipping();
00557         }
00558 
00559         ++mParseCount;
00560         switch (element)
00561         {
00562                 case ELEMENT_MAP:
00563                         *mStack.back() = LLSD::emptyMap();
00564                         break;
00565                 
00566                 case ELEMENT_ARRAY:
00567                         *mStack.back() = LLSD::emptyArray();
00568                         break;
00569                         
00570                 default:
00571                         // all the other values will be set in the end element handler
00572                         ;
00573         }
00574 }
00575 
00576 void LLSDXMLParser::Impl::endElementHandler(const XML_Char* name)
00577 {
00578         --mDepth;
00579         if (mSkipping)
00580         {
00581                 if (mDepth < mSkipThrough)
00582                 {
00583                         mSkipping = false;
00584                 }
00585                 return;
00586         }
00587         
00588         Element element = readElement(name);
00589         
00590         switch (element)
00591         {
00592                 case ELEMENT_LLSD:
00593                         if (mInLLSDElement)
00594                         {
00595                                 mInLLSDElement = false;
00596                                 mGracefullStop = true;
00597                                 XML_StopParser(mParser, false);
00598                         }
00599                         return;
00600         
00601                 case ELEMENT_KEY:
00602                         mCurrentKey = mCurrentContent.str();
00603                         return;
00604                         
00605                 default:
00606                         // all rest are values, fall through
00607                         ;
00608         }
00609         
00610         if (!mInLLSDElement) { return; }
00611 
00612         LLSD& value = *mStack.back();
00613         mStack.pop_back();
00614         
00615         std::string content = mCurrentContent.str();
00616         mCurrentContent.str("");
00617 
00618         switch (element)
00619         {
00620                 case ELEMENT_UNDEF:
00621                         value.clear();
00622                         break;
00623                 
00624                 case ELEMENT_BOOL:
00625                         value = content == "true" || content == "1";
00626                         break;
00627                 
00628                 case ELEMENT_INTEGER:
00629                         value = LLSD(content).asInteger();
00630                         break;
00631                 
00632                 case ELEMENT_REAL:
00633                         value = LLSD(content).asReal();
00634                         break;
00635                 
00636                 case ELEMENT_STRING:
00637                         value = content;
00638                         break;
00639                 
00640                 case ELEMENT_UUID:
00641                         value = LLSD(content).asUUID();
00642                         break;
00643                 
00644                 case ELEMENT_DATE:
00645                         value = LLSD(content).asDate();
00646                         break;
00647                 
00648                 case ELEMENT_URI:
00649                         value = LLSD(content).asURI();
00650                         break;
00651                 
00652                 case ELEMENT_BINARY:
00653                 {
00654                         S32 len = apr_base64_decode_len(content.c_str());
00655                         std::vector<U8> data;
00656                         data.resize(len);
00657                         len = apr_base64_decode_binary(&data[0], content.c_str());
00658                         data.resize(len);
00659                         value = data;
00660                         break;
00661                 }
00662                 
00663                 case ELEMENT_UNKNOWN:
00664                         value.clear();
00665                         break;
00666                         
00667                 default:
00668                         // other values, map and array, have already been set
00669                         break;
00670         }
00671 }
00672 
00673 void LLSDXMLParser::Impl::characterDataHandler(const XML_Char* data, int length)
00674 {
00675         mCurrentContent.write(data, length);
00676 }
00677 
00678 
00679 void LLSDXMLParser::Impl::sStartElementHandler(
00680         void* userData, const XML_Char* name, const XML_Char** attributes)
00681 {
00682         ((LLSDXMLParser::Impl*)userData)->startElementHandler(name, attributes);
00683 }
00684 
00685 void LLSDXMLParser::Impl::sEndElementHandler(
00686         void* userData, const XML_Char* name)
00687 {
00688         ((LLSDXMLParser::Impl*)userData)->endElementHandler(name);
00689 }
00690 
00691 void LLSDXMLParser::Impl::sCharacterDataHandler(
00692         void* userData, const XML_Char* data, int length)
00693 {
00694         ((LLSDXMLParser::Impl*)userData)->characterDataHandler(data, length);
00695 }
00696 
00697 
00698 LLSDXMLParser::Impl::Element LLSDXMLParser::Impl::readElement(const XML_Char* name)
00699 {
00700         if (strcmp(name, "llsd") == 0) { return ELEMENT_LLSD; }
00701         if (strcmp(name, "undef") == 0) { return ELEMENT_UNDEF; }
00702         if (strcmp(name, "boolean") == 0) { return ELEMENT_BOOL; }
00703         if (strcmp(name, "integer") == 0) { return ELEMENT_INTEGER; }
00704         if (strcmp(name, "real") == 0) { return ELEMENT_REAL; }
00705         if (strcmp(name, "string") == 0) { return ELEMENT_STRING; }
00706         if (strcmp(name, "uuid") == 0) { return ELEMENT_UUID; }
00707         if (strcmp(name, "date") == 0) { return ELEMENT_DATE; }
00708         if (strcmp(name, "uri") == 0) { return ELEMENT_URI; }
00709         if (strcmp(name, "binary") == 0) { return ELEMENT_BINARY; }
00710         if (strcmp(name, "map") == 0) { return ELEMENT_MAP; }
00711         if (strcmp(name, "array") == 0) { return ELEMENT_ARRAY; }
00712         if (strcmp(name, "key") == 0) { return ELEMENT_KEY; }
00713         
00714         return ELEMENT_UNKNOWN;
00715 }
00716 
00717 
00718 
00719 
00720 
00724 LLSDXMLParser::LLSDXMLParser() : impl(* new Impl)
00725 {
00726 }
00727 
00728 LLSDXMLParser::~LLSDXMLParser()
00729 {
00730         delete &impl;
00731 }
00732 
00733 void LLSDXMLParser::parsePart(const char *buf, int len)
00734 {
00735         impl.parsePart(buf, len);
00736 }
00737 
00738 // virtual
00739 S32 LLSDXMLParser::doParse(std::istream& input, LLSD& data) const
00740 {
00741         return impl.parse(input, data); 
00742 }

Generated on Fri May 16 08:32:07 2008 for SecondLife by  doxygen 1.5.5