Second Life Viewer: llstring.cpp Source File

00001 
00032 #include "linden_common.h"
00033 
00034 #include "llstring.h"
00035 #include "llerror.h"
00036 
00037 std::string ll_safe_string(const char* in)
00038 {
00039         if(in) return std::string(in);
00040         return std::string();
00041 }
00042 
00043 U8 hex_as_nybble(char hex)
00044 {
00045         if((hex >= '0') && (hex <= '9'))
00046         {
00047                 return (U8)(hex - '0');
00048         }
00049         else if((hex >= 'a') && (hex <='f'))
00050         {
00051                 return (U8)(10 + hex - 'a');
00052         }
00053         else if((hex >= 'A') && (hex <='F'))
00054         {
00055                 return (U8)(10 + hex - 'A');
00056         }
00057         return 0; // uh - oh, not hex any more...
00058 }
00059 
00060 
00061 bool _read_file_into_string(std::string& str, const char* filename)
00062 {
00063         llifstream ifs(filename, llifstream::binary);
00064         if (!ifs.is_open())
00065         {
00066                 llinfos << "Unable to open file" << filename << llendl;
00067                 return false;
00068         }
00069 
00070         std::ostringstream oss;
00071 
00072         oss << ifs.rdbuf();
00073         str = oss.str();
00074         ifs.close();
00075         return true;
00076 }
00077 
00078 
00079 
00080 
00081 // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
00082 // for the Unicode implementation - this doesn't match because it was written before finding
00083 // it.
00084 
00085 
00086 std::ostream& operator<<(std::ostream &s, const LLWString &wstr)
00087 {
00088         std::string utf8_str = wstring_to_utf8str(wstr);
00089         s << utf8_str;
00090         return s;
00091 }
00092 
00093 std::string rawstr_to_utf8(const std::string& raw)
00094 {
00095         LLWString wstr(utf8str_to_wstring(raw));
00096         return wstring_to_utf8str(wstr);
00097 }
00098 
00099 S32 wchar_to_utf8chars(llwchar in_char, char* outchars)
00100 {
00101         U32 cur_char = (U32)in_char;
00102         char* base = outchars;
00103         if (cur_char < 0x80)
00104         {
00105                 *outchars++ = (U8)cur_char;
00106         }
00107         else if (cur_char < 0x800)
00108         {
00109                 *outchars++ = 0xC0 | (cur_char >> 6);
00110                 *outchars++ = 0x80 | (cur_char & 0x3F);
00111         }
00112         else if (cur_char < 0x10000)
00113         {
00114                 *outchars++ = 0xE0 | (cur_char >> 12);
00115                 *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
00116                 *outchars++ = 0x80 | (cur_char & 0x3F);
00117         }
00118         else if (cur_char < 0x200000)
00119         {
00120                 *outchars++ = 0xF0 | (cur_char >> 18);
00121                 *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
00122                 *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
00123                 *outchars++ = 0x80 | cur_char & 0x3F;
00124         }
00125         else if (cur_char < 0x4000000)
00126         {
00127                 *outchars++ = 0xF8 | (cur_char >> 24);
00128                 *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
00129                 *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
00130                 *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
00131                 *outchars++ = 0x80 | cur_char & 0x3F;
00132         }
00133         else if (cur_char < 0x80000000)
00134         {
00135                 *outchars++ = 0xFC | (cur_char >> 30);
00136                 *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
00137                 *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
00138                 *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
00139                 *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
00140                 *outchars++ = 0x80 | cur_char & 0x3F;
00141         }
00142         else
00143         {
00144                 llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
00145                 *outchars++ = LL_UNKNOWN_CHAR;
00146         }
00147         return outchars - base;
00148 }       
00149 
00150 S32 utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
00151 {
00152         const U16* base = inchars;
00153         U16 cur_char = *inchars++;
00154         llwchar char32 = cur_char;
00155         if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
00156         {
00157                 // Surrogates
00158                 char32 = ((llwchar)(cur_char - 0xD800)) << 10;
00159                 cur_char = *inchars++;
00160                 char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
00161         }
00162         else
00163         {
00164                 char32 = (llwchar)cur_char;
00165         }
00166         *outchar = char32;
00167         return inchars - base;
00168 }
00169 
00170 S32 utf16chars_to_utf8chars(const U16* inchars, char* outchars, S32* nchars8p)
00171 {
00172         // Get 32 bit char32
00173         llwchar char32;
00174         S32 nchars16 = utf16chars_to_wchar(inchars, &char32);
00175         // Convert to utf8
00176         S32 nchars8  = wchar_to_utf8chars(char32, outchars);
00177         if (nchars8p)
00178         {
00179                 *nchars8p = nchars8;
00180         }
00181         return nchars16;
00182 }
00183 
00184 llutf16string wstring_to_utf16str(const LLWString &utf32str, S32 len)
00185 {
00186         llutf16string out;
00187 
00188         S32 i = 0;
00189         while (i < len)
00190         {
00191                 U32 cur_char = utf32str[i];
00192                 if (cur_char > 0xFFFF)
00193                 {
00194                         out += (0xD7C0 + (cur_char >> 10));
00195                         out += (0xDC00 | (cur_char & 0x3FF));
00196                 }
00197                 else
00198                 {
00199                         out += cur_char;
00200                 }
00201                 i++;
00202         }
00203         return out;
00204 }
00205 
00206 llutf16string wstring_to_utf16str(const LLWString &utf32str)
00207 {
00208         const S32 len = (S32)utf32str.length();
00209         return wstring_to_utf16str(utf32str, len);
00210 }
00211 
00212 llutf16string utf8str_to_utf16str ( const LLString& utf8str )
00213 {
00214         LLWString wstr = utf8str_to_wstring ( utf8str );
00215         return wstring_to_utf16str ( wstr );
00216 }
00217 
00218 
00219 LLWString utf16str_to_wstring(const llutf16string &utf16str, S32 len)
00220 {
00221         LLWString wout;
00222         if((len <= 0) || utf16str.empty()) return wout;
00223 
00224         S32 i = 0;
00225         // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
00226         const U16* chars16 = &(*(utf16str.begin()));
00227         while (i < len)
00228         {
00229                 llwchar cur_char;
00230                 i += utf16chars_to_wchar(chars16+i, &cur_char);
00231                 wout += cur_char;
00232         }
00233         return wout;
00234 }
00235 
00236 LLWString utf16str_to_wstring(const llutf16string &utf16str)
00237 {
00238         const S32 len = (S32)utf16str.length();
00239         return utf16str_to_wstring(utf16str, len);
00240 }
00241 
00242 S32 wchar_utf8_length(const llwchar wc)
00243 {
00244         if (wc < 0x80)
00245         {
00246                 // This case will also catch negative values which are
00247                 // technically invalid.
00248                 return 1;
00249         }
00250         else if (wc < 0x800)
00251         {
00252                 return 2;
00253         }
00254         else if (wc < 0x10000)
00255         {
00256                 return 3;
00257         }
00258         else if (wc < 0x200000)
00259         {
00260                 return 4;
00261         }
00262         else if (wc < 0x4000000)
00263         {
00264                 return 5;
00265         }
00266         else
00267         {
00268                 return 6;
00269         }
00270 }
00271 
00272 
00273 S32 wstring_utf8_length(const LLWString& wstr)
00274 {
00275         S32 len = 0;
00276         for (S32 i = 0; i < (S32)wstr.length(); i++)
00277         {
00278                 len += wchar_utf8_length(wstr[i]);
00279         }
00280         return len;
00281 }
00282 
00283 
00284 LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
00285 {
00286         LLWString wout;
00287 
00288         S32 i = 0;
00289         while (i < len)
00290         {
00291                 llwchar unichar;
00292                 U8 cur_char = utf8str[i];
00293 
00294                 if (cur_char < 0x80)
00295                 {
00296                         // Ascii character, just add it
00297                         unichar = cur_char;
00298                 }
00299                 else
00300                 {
00301                         S32 cont_bytes = 0;
00302                         if ((cur_char >> 5) == 0x6)                     // Two byte UTF8 -> 1 UTF32
00303                         {
00304                                 unichar = (0x1F&cur_char);
00305                                 cont_bytes = 1;
00306                         }
00307                         else if ((cur_char >> 4) == 0xe)        // Three byte UTF8 -> 1 UTF32
00308                         {
00309                                 unichar = (0x0F&cur_char);
00310                                 cont_bytes = 2;
00311                         }
00312                         else if ((cur_char >> 3) == 0x1e)       // Four byte UTF8 -> 1 UTF32
00313                         {
00314                                 unichar = (0x07&cur_char);
00315                                 cont_bytes = 3;
00316                         }
00317                         else if ((cur_char >> 2) == 0x3e)       // Five byte UTF8 -> 1 UTF32
00318                         {
00319                                 unichar = (0x03&cur_char);
00320                                 cont_bytes = 4;
00321                         }
00322                         else if ((cur_char >> 1) == 0x7e)       // Six byte UTF8 -> 1 UTF32
00323                         {
00324                                 unichar = (0x01&cur_char);
00325                                 cont_bytes = 5;
00326                         }
00327                         else
00328                         {
00329                                 wout += LL_UNKNOWN_CHAR;
00330                                 ++i;
00331                                 continue;
00332                         }
00333 
00334                         // Check that this character doesn't go past the end of the string
00335                         S32 end = (len < (i + cont_bytes)) ? len : (i + cont_bytes);
00336                         do
00337                         {
00338                                 ++i;
00339 
00340                                 cur_char = utf8str[i];
00341                                 if ( (cur_char >> 6) == 0x2 )
00342                                 {
00343                                         unichar <<= 6;
00344                                         unichar += (0x3F&cur_char);
00345                                 }
00346                                 else
00347                                 {
00348                                         // Malformed sequence - roll back to look at this as a new char
00349                                         unichar = LL_UNKNOWN_CHAR;
00350                                         --i;
00351                                         break;
00352                                 }
00353                         } while(i < end);
00354 
00355                         // Handle overlong characters and NULL characters
00356                         if ( ((cont_bytes == 1) && (unichar < 0x80))
00357                                 || ((cont_bytes == 2) && (unichar < 0x800))
00358                                 || ((cont_bytes == 3) && (unichar < 0x10000))
00359                                 || ((cont_bytes == 4) && (unichar < 0x200000))
00360                                 || ((cont_bytes == 5) && (unichar < 0x4000000)) )
00361                         {
00362                                 unichar = LL_UNKNOWN_CHAR;
00363                         }
00364                 }
00365 
00366                 wout += unichar;
00367                 ++i;
00368         }
00369         return wout;
00370 }
00371 
00372 LLWString utf8str_to_wstring(const std::string& utf8str)
00373 {
00374         const S32 len = (S32)utf8str.length();
00375         return utf8str_to_wstring(utf8str, len);
00376 }
00377 
00378 std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
00379 {
00380         std::string out;
00381 
00382         S32 i = 0;
00383         while (i < len)
00384         {
00385                 char tchars[8];         /* Flawfinder: ignore */
00386                 S32 n = wchar_to_utf8chars(utf32str[i], tchars);
00387                 tchars[n] = 0;
00388                 out += tchars;
00389                 i++;
00390         }
00391         return out;
00392 }
00393 
00394 std::string wstring_to_utf8str(const LLWString& utf32str)
00395 {
00396         const S32 len = (S32)utf32str.length();
00397         return wstring_to_utf8str(utf32str, len);
00398 }
00399 
00400 std::string utf16str_to_utf8str(const llutf16string& utf16str)
00401 {
00402         return wstring_to_utf8str(utf16str_to_wstring(utf16str));
00403 }
00404 
00405 std::string utf16str_to_utf8str(const llutf16string& utf16str, S32 len)
00406 {
00407         return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
00408 }
00409 
00410 
00411 //LLWString wstring_truncate(const LLWString &wstr, const S32 max_len)
00412 //{
00413 //      return wstr.substr(0, llmin((S32)wstr.length(), max_len));
00414 //}
00415 //
00416 //
00417 //LLWString wstring_trim(const LLWString &wstr)
00418 //{
00419 //      LLWString outstr;
00420 //      outstr = wstring_trimhead(wstr);
00421 //      outstr = wstring_trimtail(outstr);
00422 //      return outstr;
00423 //}
00424 //
00425 //
00426 //LLWString wstring_trimhead(const LLWString &wstr)
00427 //{
00428 //      if(wstr.empty())
00429 //      {
00430 //              return wstr;
00431 //      }
00432 //
00433 //    S32 i = 0;
00434 //      while((i < (S32)wstr.length()) && iswspace(wstr[i]))
00435 //      {
00436 //              i++;
00437 //      }
00438 //      return wstr.substr(i, wstr.length() - i);
00439 //}
00440 //
00441 //
00442 //LLWString wstring_trimtail(const LLWString &wstr)
00443 //{                     
00444 //      if(wstr.empty())
00445 //      {
00446 //              return wstr;
00447 //      }
00448 //
00449 //      S32 len = (S32)wstr.length();
00450 //
00451 //      S32 i = len - 1;
00452 //      while (i >= 0 && iswspace(wstr[i]))
00453 //      {
00454 //              i--;
00455 //      }
00456 //
00457 //      if (i >= 0)
00458 //      {
00459 //              return wstr.substr(0, i + 1);
00460 //      }
00461 //      return wstr;
00462 //}
00463 //
00464 //
00465 //LLWString wstring_copyinto(const LLWString &dest, const LLWString &src, const S32 insert_offset)
00466 //{
00467 //      llassert( insert_offset <= (S32)dest.length() );
00468 //
00469 //      LLWString out_str = dest.substr(0, insert_offset);
00470 //      out_str += src;
00471 //      LLWString tail = dest.substr(insert_offset);
00472 //      out_str += tail;
00473 //
00474 //      return out_str;
00475 //}
00476 
00477 
00478 //LLWString wstring_detabify(const LLWString &wstr, const S32 num_spaces)
00479 //{
00480 //      LLWString out_str;
00481 //      // Replace tabs with spaces
00482 //      for (S32 i = 0; i < (S32)wstr.length(); i++)
00483 //      {
00484 //              if (wstr[i] == '\t')
00485 //              {
00486 //                      for (S32 j = 0; j < num_spaces; j++)
00487 //                              out_str += ' ';
00488 //              }
00489 //              else
00490 //              {
00491 //                      out_str += wstr[i];
00492 //              }
00493 //      }
00494 //      return out_str;
00495 //}
00496 
00497 
00498 //LLWString wstring_makeASCII(const LLWString &wstr)
00499 //{
00500 //      // Replace non-ASCII chars with replace_char
00501 //      LLWString out_str = wstr;
00502 //      for (S32 i = 0; i < (S32)out_str.length(); i++)
00503 //      {
00504 //              if (out_str[i] > 0x7f)
00505 //              {
00506 //                      out_str[i] = LL_UNKNOWN_CHAR;
00507 //              }
00508 //      }
00509 //      return out_str;
00510 //}
00511 
00512 
00513 //LLWString wstring_substChar(const LLWString &wstr, const llwchar target_char, const llwchar replace_char)
00514 //{
00515 //      // Replace all occurences of target_char with replace_char
00516 //      LLWString out_str = wstr;
00517 //      for (S32 i = 0; i < (S32)out_str.length(); i++)
00518 //      {
00519 //              if (out_str[i] == target_char)
00520 //              {
00521 //                      out_str[i] = replace_char;
00522 //              }
00523 //      }
00524 //      return out_str;
00525 //}
00526 //
00527 //
00528 //LLWString wstring_tolower(const LLWString &wstr)
00529 //{
00530 //      LLWString out_str = wstr;
00531 //      for (S32 i = 0; i < (S32)out_str.length(); i++)
00532 //      {
00533 //              out_str[i] = towlower(out_str[i]);
00534 //      }
00535 //      return out_str;
00536 //}
00537 //
00538 //
00539 //LLWString wstring_convert_to_lf(const LLWString &wstr)
00540 //{
00541 //      const llwchar CR = 13;
00542 //      // Remove carriage returns from string with CRLF
00543 //      LLWString out_str;
00544 //
00545 //      for (S32 i = 0; i < (S32)wstr.length(); i++)
00546 //      {
00547 //              if (wstr[i] != CR)
00548 //              {
00549 //                      out_str += wstr[i];
00550 //              }
00551 //      }
00552 //      return out_str;
00553 //}
00554 //
00555 //
00556 //LLWString wstring_convert_to_crlf(const LLWString &wstr)
00557 //{
00558 //      const llwchar LF = 10;
00559 //      const llwchar CR = 13;
00560 //      // Remove carriage returns from string with CRLF
00561 //      LLWString out_str;
00562 //
00563 //      for (S32 i = 0; i < (S32)wstr.length(); i++)
00564 //      {
00565 //              if (wstr[i] == LF)
00566 //              {
00567 //                      out_str += CR;
00568 //              }
00569 //              out_str += wstr[i];
00570 //      }
00571 //      return out_str;
00572 //}
00573 
00574 
00575 //S32   wstring_compare_insensitive(const LLWString &lhs, const LLWString &rhs)
00576 //{
00577 //
00578 //      if (lhs == rhs)
00579 //      {
00580 //              return 0;
00581 //      }
00582 //
00583 //      if (lhs.empty())
00584 //      {
00585 //              return rhs.empty() ? 0 : 1;
00586 //      }
00587 //
00588 //      if (rhs.empty())
00589 //      {
00590 //              return -1;
00591 //      }
00592 //
00593 //#ifdef LL_LINUX
00594 //      // doesn't work because gcc 2.95 doesn't correctly implement c_str().  Sigh...
00595 //      llerrs << "wstring_compare_insensitive doesn't work on Linux!" << llendl;
00596 //      return 0;
00597 //#else
00598 //      LLWString lhs_lower = lhs;
00599 //      LLWString::toLower(lhs_lower);
00600 //      std::string lhs_lower = wstring_to_utf8str(lhs_lower);
00601 //      LLWString rhs_lower = lhs;
00602 //      LLWString::toLower(rhs_lower);
00603 //      std::string rhs_lower = wstring_to_utf8str(rhs_lower);
00604 //
00605 //      return strcmp(lhs_lower.c_str(), rhs_lower.c_str());
00606 //#endif
00607 //}
00608 
00609 
00610 std::string utf8str_trim(const std::string& utf8str)
00611 {
00612         LLWString wstr = utf8str_to_wstring(utf8str);
00613         LLWString::trim(wstr);
00614         return wstring_to_utf8str(wstr);
00615 }
00616 
00617 
00618 std::string utf8str_tolower(const std::string& utf8str)
00619 {
00620         LLWString out_str = utf8str_to_wstring(utf8str);
00621         LLWString::toLower(out_str);
00622         return wstring_to_utf8str(out_str);
00623 }
00624 
00625 
00626 S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
00627 {
00628         LLWString wlhs = utf8str_to_wstring(lhs);
00629         LLWString wrhs = utf8str_to_wstring(rhs);
00630         return LLWString::compareInsensitive(wlhs.c_str(), wrhs.c_str());
00631 }
00632 
00633 std::string utf8str_truncate(const std::string& utf8str, const S32 max_len)
00634 {
00635         if (0 == max_len)
00636         {
00637                 return std::string();
00638         }
00639         if ((S32)utf8str.length() <= max_len)
00640         {
00641                 return utf8str;
00642         }
00643         else
00644         {
00645                 S32 cur_char = max_len;
00646 
00647                 // If we're ASCII, we don't need to do anything
00648                 if ((U8)utf8str[cur_char] > 0x7f)
00649                 {
00650                         // If first two bits are (10), it's the tail end of a multibyte char.  We need to shift back
00651                         // to the first character
00652                         while (0x80 == (0xc0 & utf8str[cur_char]))
00653                         {
00654                                 cur_char--;
00655                                 // Keep moving forward until we hit the first char;
00656                                 if (cur_char == 0)
00657                                 {
00658                                         // Make sure we don't trash memory if we've got a bogus string.
00659                                         break;
00660                                 }
00661                         }
00662                 }
00663                 // The byte index we're on is one we want to get rid of, so we only want to copy up to (cur_char-1) chars
00664                 return utf8str.substr(0, cur_char);
00665         }
00666 }
00667 
00668 std::string utf8str_substChar(
00669         const std::string& utf8str,
00670         const llwchar target_char,
00671         const llwchar replace_char)
00672 {
00673         LLWString wstr = utf8str_to_wstring(utf8str);
00674         LLWString::replaceChar(wstr, target_char, replace_char);
00675         //wstr = wstring_substChar(wstr, target_char, replace_char);
00676         return wstring_to_utf8str(wstr);
00677 }
00678 
00679 std::string utf8str_makeASCII(const std::string& utf8str)
00680 {
00681         LLWString wstr = utf8str_to_wstring(utf8str);
00682         LLWString::_makeASCII(wstr);
00683         return wstring_to_utf8str(wstr);
00684 }
00685 
00686 std::string mbcsstring_makeASCII(const std::string& wstr)
00687 {
00688         // Replace non-ASCII chars with replace_char
00689         std::string out_str = wstr;
00690         for (S32 i = 0; i < (S32)out_str.length(); i++)
00691         {
00692                 if ((U8)out_str[i] > 0x7f)
00693                 {
00694                         out_str[i] = LL_UNKNOWN_CHAR;
00695                 }
00696         }
00697         return out_str;
00698 }
00699 std::string utf8str_removeCRLF(const std::string& utf8str)
00700 {
00701         if (0 == utf8str.length())
00702         {
00703                 return std::string();
00704         }
00705         const char CR = 13;
00706 
00707         std::string out;
00708         out.reserve(utf8str.length());
00709         const S32 len = (S32)utf8str.length();
00710         for( S32 i = 0; i < len; i++ )
00711         {
00712                 if( utf8str[i] != CR )
00713                 {
00714                         out.push_back(utf8str[i]);
00715                 }
00716         }
00717         return out;
00718 }
00719 
00720 #if LL_WINDOWS
00721 /* If the size of the passed in buffer is not large enough to hold the string,
00722  * two bad things happen:
00723  * 1. resulting formatted string is NOT null terminated
00724  * 2. Depending on the platform, the return value could be a) the required
00725  *    size of the buffer to copy the entire formatted string or b) -1.
00726  *    On Windows with VS.Net 2003, it returns -1 e.g. 
00727  *
00728  * safe_snprintf always adds a NULL terminator so that the caller does not
00729  * need to check for return value or need to add the NULL terminator.
00730  * It does not, however change the return value - to let the caller know
00731  * that the passed in buffer size was not large enough to hold the formatted string.
00732  *
00733  */
00734 int safe_snprintf(char *str, size_t size, const char *format, ...)
00735 {
00736         va_list args;
00737         va_start(args, format);
00738 
00739         int num_written = _vsnprintf(str, size, format, args); /* Flawfinder: ignore */
00740         va_end(args);
00741         
00742         str[size-1] = '\0'; // always null terminate
00743         return num_written;
00744 }
00745 #endif // LL_WINDOWS
00746 
00747 S32     LLStringOps::collate(const llwchar* a, const llwchar* b)
00748 { 
00749         #if LL_WINDOWS
00750                 // in Windows, wide string functions operator on 16-bit strings, 
00751                 // not the proper 32 bit wide string
00752                 return strcmp(wstring_to_utf8str(LLWString(a)).c_str(), wstring_to_utf8str(LLWString(b)).c_str());
00753         #else
00754                 return wcscoll(a, b);
00755         #endif
00756 }
00757 
00758 namespace LLStringFn
00759 {
00760         void replace_nonprintable(std::basic_string<char>& string, char replacement)
00761         {
00762                 const char MIN = 0x20;
00763                 std::basic_string<char>::size_type len = string.size();
00764                 for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
00765                 {
00766                         if(string[ii] < MIN)
00767                         {
00768                                 string[ii] = replacement;
00769                         }
00770                 }
00771         }
00772 
00773         void replace_nonprintable(
00774                 std::basic_string<llwchar>& string,
00775                 llwchar replacement)
00776         {
00777                 const llwchar MIN = 0x20;
00778                 const llwchar MAX = 0x7f;
00779                 std::basic_string<llwchar>::size_type len = string.size();
00780                 for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
00781                 {
00782                         if((string[ii] < MIN) || (string[ii] > MAX))
00783                         {
00784                                 string[ii] = replacement;
00785                         }
00786                 }
00787         }
00788 
00789         void replace_nonprintable_and_pipe(std::basic_string<char>& str,
00790                                                                            char replacement)
00791         {
00792                 const char MIN  = 0x20;
00793                 const char PIPE = 0x7c;
00794                 std::basic_string<char>::size_type len = str.size();
00795                 for(std::basic_string<char>::size_type ii = 0; ii < len; ++ii)
00796                 {
00797                         if( (str[ii] < MIN) || (str[ii] == PIPE) )
00798                         {
00799                                 str[ii] = replacement;
00800                         }
00801                 }
00802         }
00803 
00804         void replace_nonprintable_and_pipe(std::basic_string<llwchar>& str,
00805                                                                            llwchar replacement)
00806         {
00807                 const llwchar MIN  = 0x20;
00808                 const llwchar MAX  = 0x7f;
00809                 const llwchar PIPE = 0x7c;
00810                 std::basic_string<llwchar>::size_type len = str.size();
00811                 for(std::basic_string<llwchar>::size_type ii = 0; ii < len; ++ii)
00812                 {
00813                         if( (str[ii] < MIN) || (str[ii] > MAX) || (str[ii] == PIPE) )
00814                         {
00815                                 str[ii] = replacement;
00816                         }
00817                 }
00818         }
00819 }
00820 
00821 
00823 // Testing
00824 
00825 #ifdef _DEBUG
00826 
00827 template<class T> 
00828 void LLStringBase<T>::testHarness()
00829 {
00830         LLString s1;
00831         
00832         llassert( s1.c_str() == NULL );
00833         llassert( s1.size() == 0 );
00834         llassert( s1.empty() );
00835         
00836         LLString s2( "hello");
00837         llassert( !strcmp( s2.c_str(), "hello" ) );
00838         llassert( s2.size() == 5 ); 
00839         llassert( !s2.empty() );
00840         LLString s3( s2 );
00841 
00842         llassert( "hello" == s2 );
00843         llassert( s2 == "hello" );
00844         llassert( s2 > "gello" );
00845         llassert( "gello" < s2 );
00846         llassert( "gello" != s2 );
00847         llassert( s2 != "gello" );
00848 
00849         LLString s4 = s2;
00850         llassert( !s4.empty() );
00851         s4.empty();
00852         llassert( s4.empty() );
00853         
00854         LLString s5("");
00855         llassert( s5.empty() );
00856         
00857         llassert( isValidIndex(s5, 0) );
00858         llassert( !isValidIndex(s5, 1) );
00859         
00860         s3 = s2;
00861         s4 = "hello again";
00862         
00863         s4 += "!";
00864         s4 += s4;
00865         llassert( s4 == "hello again!hello again!" );
00866         
00867         
00868         LLString s6 = s2 + " " + s2;
00869         LLString s7 = s6;
00870         llassert( s6 == s7 );
00871         llassert( !( s6 != s7) );
00872         llassert( !(s6 < s7) );
00873         llassert( !(s6 > s7) );
00874         
00875         llassert( !(s6 == "hi"));
00876         llassert( s6 == "hello hello");
00877         llassert( s6 < "hi");
00878         
00879         llassert( s6[1] == 'e' );
00880         s6[1] = 'f';
00881         llassert( s6[1] == 'f' );
00882         
00883         s2.erase( 4, 1 );
00884         llassert( s2 == "hell");
00885         s2.insert( 0, 'y' );
00886         llassert( s2 == "yhell");
00887         s2.erase( 1, 3 );
00888         llassert( s2 == "yl");
00889         s2.insert( 1, "awn, don't yel");
00890         llassert( s2 == "yawn, don't yell");
00891         
00892         LLString s8 = s2.substr( 6, 5 );
00893         llassert( s8 == "don't"  );
00894         
00895         LLString s9 = "   \t\ntest  \t\t\n  ";
00896         trim(s9);
00897         llassert( s9 == "test"  );
00898 
00899         s8 = "abc123&*(ABC";
00900 
00901         s9 = s8;
00902         toUpper(s9);
00903         llassert( s9 == "ABC123&*(ABC"  );
00904 
00905         s9 = s8;
00906         toLower(s9);
00907         llassert( s9 == "abc123&*(abc"  );
00908 
00909 
00910         LLString s10( 10, 'x' );
00911         llassert( s10 == "xxxxxxxxxx" );
00912 
00913         LLString s11( "monkey in the middle", 7, 2 );
00914         llassert( s11 == "in" );
00915 
00916         LLString s12;  //empty
00917         s12 += "foo";
00918         llassert( s12 == "foo" );
00919 
00920         LLString s13;  //empty
00921         s13 += 'f';
00922         llassert( s13 == "f" );
00923 }
00924 
00925 
00926 #endif  // _DEBUG