llmime.cpp

Go to the documentation of this file.
00001 
00034 #include "linden_common.h"
00035 #include "llmime.h"
00036 
00037 #include <vector>
00038 
00039 #include "llmemorystream.h"
00040 
00044 // Headers specified in rfc-2045 will be canonicalized below.
00045 static const std::string CONTENT_LENGTH("Content-Length");
00046 static const std::string CONTENT_TYPE("Content-Type");
00047 static const S32 KNOWN_HEADER_COUNT = 6;
00048 static const std::string KNOWN_HEADER[KNOWN_HEADER_COUNT] =
00049 {
00050         CONTENT_LENGTH,
00051         CONTENT_TYPE,
00052         std::string("MIME-Version"),
00053         std::string("Content-Transfer-Encoding"),
00054         std::string("Content-ID"),
00055         std::string("Content-Description"),
00056 };
00057 
00058 // parser helpers
00059 static const std::string MULTIPART("multipart");
00060 static const std::string BOUNDARY("boundary");
00061 static const std::string END_OF_CONTENT_PARAMETER("\r\n ;\t");
00062 static const std::string SEPARATOR_PREFIX("--");
00063 //static const std::string SEPARATOR_SUFFIX("\r\n");
00064 
00065 /*
00066 Content-Type: multipart/mixed; boundary="segment"
00067 Content-Length: 24832
00068 
00069 --segment
00070 Content-Type: image/j2c
00071 Content-Length: 23715
00072 
00073 <data>
00074 
00075 --segment
00076 Content-Type: text/xml; charset=UTF-8
00077 
00078 <meta data>
00079 EOF
00080 
00081 */
00082 
00092 class LLMimeIndex::Impl
00093 {
00094 public:
00095         Impl() : mOffset(-1), mUseCount(1)
00096         {}
00097         Impl(LLSD headers, S32 offset) :
00098                 mHeaders(headers), mOffset(offset), mUseCount(1)
00099         {}
00100 public:
00101         LLSD mHeaders;
00102         S32 mOffset;
00103         S32 mUseCount;
00104 
00105         typedef std::vector<LLMimeIndex> sub_part_t;
00106         sub_part_t mAttachments;
00107 };
00108 
00109 LLSD LLMimeIndex::headers() const
00110 {
00111         return mImpl->mHeaders;
00112 }
00113 
00114 S32 LLMimeIndex::offset() const
00115 {
00116         return mImpl->mOffset;
00117 }
00118 
00119 S32 LLMimeIndex::contentLength() const
00120 {
00121         // Find the content length in the headers.
00122         S32 length = -1;
00123         LLSD content_length = mImpl->mHeaders[CONTENT_LENGTH];
00124         if(content_length.isDefined())
00125         {
00126                 length = content_length.asInteger();
00127         }
00128         return length;
00129 }
00130 
00131 std::string LLMimeIndex::contentType() const
00132 {
00133         std::string type;
00134         LLSD content_type = mImpl->mHeaders[CONTENT_TYPE];
00135         if(content_type.isDefined())
00136         {
00137                 type = content_type.asString();
00138         }
00139         return type;
00140 }
00141 
00142 bool LLMimeIndex::isMultipart() const
00143 {
00144         bool multipart = false;
00145         LLSD content_type = mImpl->mHeaders[CONTENT_TYPE];
00146         if(content_type.isDefined())
00147         {
00148                 std::string type = content_type.asString();
00149                 int comp = type.compare(0, MULTIPART.size(), MULTIPART);
00150                 if(0 == comp)
00151                 {
00152                         multipart = true;
00153                 }
00154         }
00155         return multipart;
00156 }
00157 
00158 S32 LLMimeIndex::subPartCount() const
00159 {
00160         return mImpl->mAttachments.size();
00161 }
00162 
00163 LLMimeIndex LLMimeIndex::subPart(S32 index) const
00164 {
00165         LLMimeIndex part;
00166         if((index >= 0) && (index < (S32)mImpl->mAttachments.size()))
00167         {
00168                 part = mImpl->mAttachments[index];
00169         }
00170         return part;
00171 }
00172 
00173 LLMimeIndex::LLMimeIndex() : mImpl(new LLMimeIndex::Impl)
00174 {
00175 }
00176 
00177 LLMimeIndex::LLMimeIndex(LLSD headers, S32 content_offset) :
00178         mImpl(new LLMimeIndex::Impl(headers, content_offset))
00179 {
00180 }
00181 
00182 LLMimeIndex::LLMimeIndex(const LLMimeIndex& mime) :
00183         mImpl(mime.mImpl)
00184 {
00185         ++mImpl->mUseCount;
00186 }
00187 
00188 LLMimeIndex::~LLMimeIndex()
00189 {
00190         if(0 == --mImpl->mUseCount)
00191         {
00192                 delete mImpl;
00193         }
00194 }
00195 
00196 LLMimeIndex& LLMimeIndex::operator=(const LLMimeIndex& mime)
00197 {
00198         // Increment use count first so that we handle self assignment
00199         // automatically.
00200         ++mime.mImpl->mUseCount;
00201         if(0 == --mImpl->mUseCount)
00202         {
00203                 delete mImpl;
00204         }
00205         mImpl = mime.mImpl;
00206         return *this;
00207 }
00208 
00209 bool LLMimeIndex::attachSubPart(LLMimeIndex sub_part)
00210 {
00211         // *FIX: Should we check for multi-part?
00212         if(mImpl->mAttachments.size() < S32_MAX)
00213         {
00214                 mImpl->mAttachments.push_back(sub_part);
00215                 return true;
00216         }
00217         return false;
00218 }
00219 
00228 class LLMimeParser::Impl
00229 {
00230 public:
00231         // @brief Constructor.
00232         Impl();
00233 
00234         // @brief Reset this for a new parse.
00235         void reset();
00236 
00251         bool parseIndex(
00252                 std::istream& istr,
00253                 S32 limit,
00254                 const std::string& separator,
00255                 bool is_subpart,
00256                 LLMimeIndex& index);
00257 
00258 protected:
00269         bool parseHeaders(std::istream& istr, S32 limit, LLSD& headers);
00270 
00277         std::string findSeparator(std::string multipart_content_type);
00278 
00286         void scanPastSeparator(
00287                 std::istream& istr,
00288                 S32 limit,
00289                 const std::string& separator);
00290 
00299         void scanPastContent(
00300                 std::istream& istr,
00301                 S32 limit,
00302                 LLSD headers,
00303                 const std::string separator);
00304 
00314         bool eatCRLF(std::istream& istr);
00315 
00316         // @brief Returns true if parsing should continue.
00317         bool continueParse() const { return (!mError && mContinue); }
00318 
00319         // @brief anonymous enumeration for parse buffer size.
00320         enum
00321         {
00322                 LINE_BUFFER_LENGTH = 1024
00323         };
00324 
00325 protected:
00326         S32 mScanCount;
00327         bool mContinue;
00328         bool mError;
00329         char mBuffer[LINE_BUFFER_LENGTH];
00330 };
00331 
00332 LLMimeParser::Impl::Impl()
00333 {
00334         reset();
00335 }
00336 
00337 void LLMimeParser::Impl::reset()
00338 {
00339         mScanCount = 0;
00340         mContinue = true;
00341         mError = false;
00342         mBuffer[0] = '\0';
00343 }
00344 
00345 bool LLMimeParser::Impl::parseIndex(
00346         std::istream& istr,
00347         S32 limit,
00348         const std::string& separator,
00349         bool is_subpart,
00350         LLMimeIndex& index)
00351 {
00352         LLSD headers;
00353         bool parsed_something = false;
00354         if(parseHeaders(istr, limit, headers))
00355         {
00356                 parsed_something = true;
00357                 LLMimeIndex mime(headers, mScanCount);
00358                 index = mime;
00359                 if(index.isMultipart())
00360                 {
00361                         // Figure out the separator, scan past it, and recurse.
00362                         std::string ct = headers[CONTENT_TYPE].asString();
00363                         std::string sep = findSeparator(ct);
00364                         scanPastSeparator(istr, limit, sep);
00365                         while(continueParse() && parseIndex(istr, limit, sep, true, mime))
00366                         {
00367                                 index.attachSubPart(mime);
00368                         }
00369                 }
00370                 else
00371                 {
00372                         // Scan to the end of content.
00373                         scanPastContent(istr, limit, headers, separator);
00374                         if(is_subpart)
00375                         {
00376                                 scanPastSeparator(istr, limit, separator);
00377                         }
00378                 }
00379         }
00380         if(mError) return false;
00381         return parsed_something;
00382 }
00383 
00384 bool LLMimeParser::Impl::parseHeaders(
00385         std::istream& istr,
00386         S32 limit,
00387         LLSD& headers)
00388 {
00389         while(continueParse())
00390         {
00391                 // Get the next line.
00392                 // We subtract 1 from the limit so that we make sure
00393                 // not to read past limit when we get() the newline.
00394                 S32 max_get = llmin((S32)LINE_BUFFER_LENGTH, limit - mScanCount - 1);
00395                 istr.getline(mBuffer, max_get, '\r');
00396                 mScanCount += istr.gcount();
00397                 int c = istr.get();
00398                 if(EOF == c)
00399                 {
00400                         mContinue = false;
00401                         return false;
00402                 }
00403                 ++mScanCount;
00404                 if(c != '\n')
00405                 {
00406                         mError = true;
00407                         return false;
00408                 }
00409                 if(mScanCount >= limit)
00410                 {
00411                         mContinue = false;
00412                 }
00413 
00414                 // Check if that's the end of headers.
00415                 if('\0' == mBuffer[0])
00416                 {
00417                         break;
00418                 }
00419 
00420                 // Split out the name and value.
00421                 // *NOTE: The use of strchr() here is safe since mBuffer is
00422                 // guaranteed to be NULL terminated from the call to getline()
00423                 // above.
00424                 char* colon = strchr(mBuffer, ':');
00425                 if(!colon)
00426                 {
00427                         mError = true;
00428                         return false;
00429                 }
00430 
00431                 // Cononicalize the name part, and store the name: value in
00432                 // the headers structure. We do this by iterating through
00433                 // 'known' headers and replacing the value found with the
00434                 // correct one.
00435                 // *NOTE: Not so efficient, but iterating through a small
00436                 // subset should not be too much of an issue.
00437                 std::string name(mBuffer, colon++ - mBuffer);
00438                 while(isspace(*colon)) ++colon;
00439                 std::string value(colon);
00440                 for(S32 ii = 0; ii < KNOWN_HEADER_COUNT; ++ii)
00441                 {
00442                         if(0 == LLString::compareInsensitive(
00443                                 name.c_str(),
00444                                 KNOWN_HEADER[ii].c_str()))
00445                         {
00446                                 name = KNOWN_HEADER[ii];
00447                                 break;
00448                         }
00449                 }
00450                 headers[name] = value;
00451         }
00452         if(headers.isUndefined()) return false;
00453         return true;
00454 }
00455 
00456 std::string LLMimeParser::Impl::findSeparator(std::string header)
00457 {
00458         //                               01234567890
00459         //Content-Type: multipart/mixed; boundary="segment"
00460         std::string separator;
00461         std::string::size_type pos = header.find(BOUNDARY);
00462         if(std::string::npos == pos) return separator;
00463         pos += BOUNDARY.size() + 1;
00464         std::string::size_type end;
00465         if(header[pos] == '"')
00466         {
00467                 // the boundary is quoted, find the end from pos, and take the
00468                 // substring.
00469                 end = header.find('"', ++pos);
00470                 if(std::string::npos == end)
00471                 {
00472                         // poorly formed boundary.
00473                         mError = true;
00474                 }
00475         }
00476         else
00477         {
00478                 // otherwise, it's every character until a whitespace, end of
00479                 // line, or another parameter begins.
00480                 end = header.find_first_of(END_OF_CONTENT_PARAMETER, pos);
00481                 if(std::string::npos == end)
00482                 {
00483                         // it goes to the end of the string.
00484                         end = header.size();
00485                 }
00486         }
00487         if(!mError) separator = header.substr(pos, end - pos);
00488         return separator;
00489 }
00490 
00491 void LLMimeParser::Impl::scanPastSeparator(
00492         std::istream& istr,
00493         S32 limit,
00494         const std::string& sep)
00495 {
00496         std::ostringstream ostr;
00497         ostr << SEPARATOR_PREFIX << sep;
00498         std::string separator = ostr.str();
00499         bool found_separator = false;
00500         while(!found_separator && continueParse())
00501         {
00502                 // Subtract 1 from the limit so that we make sure not to read
00503                 // past limit when we get() the newline.
00504                 S32 max_get = llmin((S32)LINE_BUFFER_LENGTH, limit - mScanCount - 1);
00505                 istr.getline(mBuffer, max_get, '\r');
00506                 mScanCount += istr.gcount();
00507                 if(istr.gcount() >= LINE_BUFFER_LENGTH - 1)
00508                 {
00509                         // that's way too long to be a separator, so ignore it.
00510                         continue;
00511                 }
00512                 int c = istr.get();
00513                 if(EOF == c)
00514                 {
00515                         mContinue = false;
00516                         return;
00517                 }
00518                 ++mScanCount;
00519                 if(c != '\n')
00520                 {
00521                         mError = true;
00522                         return;
00523                 }
00524                 if(mScanCount >= limit)
00525                 {
00526                         mContinue = false;
00527                 }
00528                 if(0 == LLString::compareStrings(mBuffer, separator.c_str()))
00529                 {
00530                         found_separator = true;
00531                 }
00532         }
00533 }
00534 
00535 void LLMimeParser::Impl::scanPastContent(
00536         std::istream& istr,
00537         S32 limit,
00538         LLSD headers,
00539         const std::string separator)
00540 {
00541         if(headers.has(CONTENT_LENGTH))
00542         {
00543                 S32 content_length = headers[CONTENT_LENGTH].asInteger();
00544                 // Subtract 2 here for the \r\n after the content.
00545                 S32 max_skip = llmin(content_length, limit - mScanCount - 2);
00546                 istr.ignore(max_skip);
00547                 mScanCount += max_skip;
00548 
00549                 // *NOTE: Check for hitting the limit and eof here before
00550                 // checking for the trailing EOF, because our mime parser has
00551                 // to gracefully handle incomplete mime entites.
00552                 if((mScanCount >= limit) || istr.eof())
00553                 {
00554                         mContinue = false;
00555                 }
00556                 else if(!eatCRLF(istr))
00557                 {
00558                         mError = true;
00559                         return;
00560                 }
00561         }
00562 }
00563 
00564 bool LLMimeParser::Impl::eatCRLF(std::istream& istr)
00565 {
00566         int c = istr.get();
00567         ++mScanCount;
00568         if(c != '\r')
00569         {
00570                 return false;
00571         }
00572         c = istr.get();
00573         ++mScanCount;
00574         if(c != '\n')
00575         {
00576                 return false;
00577         }
00578         return true;
00579 }
00580         
00581 
00582 LLMimeParser::LLMimeParser() : mImpl(* new LLMimeParser::Impl)
00583 {
00584 }
00585 
00586 LLMimeParser::~LLMimeParser()
00587 {
00588         delete & mImpl;
00589 }
00590 
00591 void LLMimeParser::reset()
00592 {
00593         mImpl.reset();
00594 }
00595 
00596 bool LLMimeParser::parseIndex(std::istream& istr, LLMimeIndex& index)
00597 {
00598         std::string separator;
00599         return mImpl.parseIndex(istr, S32_MAX, separator, false, index);
00600 }
00601 
00602 bool LLMimeParser::parseIndex(
00603         const std::vector<U8>& buffer,
00604         LLMimeIndex& index)
00605 {
00606         LLMemoryStream mstr(&buffer[0], buffer.size());
00607         return parseIndex(mstr, buffer.size() + 1, index);
00608 }
00609 
00610 bool LLMimeParser::parseIndex(
00611         std::istream& istr,
00612         S32 limit,
00613         LLMimeIndex& index)
00614 {
00615         std::string separator;
00616         return mImpl.parseIndex(istr, limit, separator, false, index);
00617 }
00618 
00619 bool LLMimeParser::parseIndex(const U8* buffer, S32 length, LLMimeIndex& index)
00620 {
00621         LLMemoryStream mstr(buffer, length);
00622         return parseIndex(mstr, length + 1, index);
00623 }
00624 
00625 /*
00626 bool LLMimeParser::verify(std::istream& isr, LLMimeIndex& index) const
00627 {
00628         return false;
00629 }
00630 
00631 bool LLMimeParser::verify(U8* buffer, S32 length, LLMimeIndex& index) const
00632 {
00633         LLMemoryStream mstr(buffer, length);
00634         return verify(mstr, index);
00635 }
00636 */

Generated on Thu Jul 1 06:08:53 2010 for Second Life Viewer by  doxygen 1.4.7