llmessagetemplateparser.cpp

Go to the documentation of this file.
00001 
00032 #include "linden_common.h"
00033 #include "llmessagetemplateparser.h"
00034 #include <boost/tokenizer.hpp>
00035 
00036 
00037 // What follows is a bunch of C functions to do validation.
00038 
00039 // Lets support a small subset of regular expressions here
00040 // Syntax is a string made up of:
00041 //      a       - checks against alphanumeric                           ([A-Za-z0-9])
00042 //      c       - checks against character                                      ([A-Za-z])
00043 //      f       - checks against first variable character       ([A-Za-z_])
00044 //      v       - checks against variable                                       ([A-Za-z0-9_])
00045 //      s       - checks against sign of integer                        ([-0-9])
00046 //  d   - checks against integer digit                          ([0-9])
00047 //  *   - repeat last check
00048 
00049 // checks 'a'
00050 BOOL    b_return_alphanumeric_ok(char c)
00051 {
00052         if (  (  (c < 'A')
00053                    ||(c > 'Z'))
00054                 &&(  (c < 'a')
00055                    ||(c > 'z'))
00056                 &&(  (c < '0')
00057                    ||(c > '9')))
00058         {
00059                 return FALSE;
00060         }
00061         return TRUE;
00062 }
00063 
00064 // checks 'c'
00065 BOOL    b_return_character_ok(char c)
00066 {
00067         if (  (  (c < 'A')
00068                    ||(c > 'Z'))
00069                 &&(  (c < 'a')
00070                    ||(c > 'z')))
00071         {
00072                 return FALSE;
00073         }
00074         return TRUE;
00075 }
00076 
00077 // checks 'f'
00078 BOOL    b_return_first_variable_ok(char c)
00079 {
00080         if (  (  (c < 'A')
00081                    ||(c > 'Z'))
00082                 &&(  (c < 'a')
00083                    ||(c > 'z'))
00084                 &&(c != '_'))
00085         {
00086                 return FALSE;
00087         }
00088         return TRUE;
00089 }
00090 
00091 // checks 'v'
00092 BOOL    b_return_variable_ok(char c)
00093 {
00094         if (  (  (c < 'A')
00095                    ||(c > 'Z'))
00096                 &&(  (c < 'a')
00097                    ||(c > 'z'))
00098                 &&(  (c < '0')
00099                    ||(c > '9'))
00100                 &&(c != '_'))
00101         {
00102                 return FALSE;
00103         }
00104         return TRUE;
00105 }
00106 
00107 // checks 's'
00108 BOOL    b_return_signed_integer_ok(char c)
00109 {
00110         if (  (  (c < '0')
00111                    ||(c > '9'))
00112                 &&(c != '-'))
00113         {
00114                 return FALSE;
00115         }
00116         return TRUE;
00117 }
00118 
00119 // checks 'd'
00120 BOOL    b_return_integer_ok(char c)
00121 {
00122         if (  (c < '0')
00123                 ||(c > '9'))
00124         {
00125                 return FALSE;
00126         }
00127         return TRUE;
00128 }
00129 
00130 BOOL    (*gParseCheckCharacters[])(char c) =
00131 {
00132         b_return_alphanumeric_ok,
00133         b_return_character_ok,
00134         b_return_first_variable_ok,
00135         b_return_variable_ok,
00136         b_return_signed_integer_ok,
00137         b_return_integer_ok
00138 };
00139 
00140 S32 get_checker_number(char checker)
00141 {
00142         switch(checker)
00143         {
00144         case 'a':
00145                 return 0;
00146         case 'c':
00147                 return 1;
00148         case 'f':
00149                 return 2;
00150         case 'v':
00151                 return 3;
00152         case 's':
00153                 return 4;
00154         case 'd':
00155                 return 5;
00156         case '*':
00157                 return 9999;
00158         default:
00159                 return -1;
00160         }
00161 }
00162 
00163 // check token based on passed simplified regular expression
00164 BOOL    b_check_token(const char *token, char *regexp)
00165 {
00166         S32 tptr, rptr = 0;
00167         S32 current_checker, next_checker = 0;
00168 
00169         current_checker = get_checker_number(regexp[rptr++]);
00170 
00171         if (current_checker == -1)
00172         {
00173                 llerrs << "Invalid regular expression value!" << llendl;
00174                 return FALSE;
00175         }
00176 
00177         if (current_checker == 9999)
00178         {
00179                 llerrs << "Regular expression can't start with *!" << llendl;
00180                 return FALSE;
00181         }
00182 
00183         for (tptr = 0; token[tptr]; tptr++)
00184         {
00185                 if (current_checker == -1)
00186                 {
00187                         llerrs << "Input exceeds regular expression!\nDid you forget a *?" << llendl;
00188                         return FALSE;
00189                 }
00190 
00191                 if (!gParseCheckCharacters[current_checker](token[tptr]))
00192                 {
00193                         return FALSE;
00194                 }
00195                 if (next_checker != 9999)
00196                 {
00197                         next_checker = get_checker_number(regexp[rptr++]);
00198                         if (next_checker != 9999)
00199                         {
00200                                 current_checker = next_checker;
00201                         }
00202                 }
00203         }
00204         return TRUE;
00205 }
00206 
00207 // C variable can be made up of upper or lower case letters, underscores, or numbers, but can't start with a number
00208 BOOL    b_variable_ok(const char *token)
00209 {
00210         if (!b_check_token(token, "fv*"))
00211         {
00212                 llwarns << "Token '" << token << "' isn't a variable!" << llendl;
00213                 return FALSE;
00214         }
00215         return TRUE;
00216 }
00217 
00218 // An integer is made up of the digits 0-9 and may be preceded by a '-'
00219 BOOL    b_integer_ok(const char *token)
00220 {
00221         if (!b_check_token(token, "sd*"))
00222         {
00223                 llwarns << "Token isn't an integer!" << llendl;
00224                 return FALSE;
00225         }
00226         return TRUE;
00227 }
00228 
00229 // An integer is made up of the digits 0-9
00230 BOOL    b_positive_integer_ok(const char *token)
00231 {
00232         if (!b_check_token(token, "d*"))
00233         {
00234                 llwarns << "Token isn't an integer!" << llendl;
00235                 return FALSE;
00236         }
00237         return TRUE;
00238 }
00239 
00240 
00241 // Done with C functions, here's the tokenizer.
00242 
00243 typedef boost::tokenizer< boost::char_separator<char> > tokenizer;      
00244 
00245 LLTemplateTokenizer::LLTemplateTokenizer(const std::string & contents) : mStarted(false), mTokens()
00246 {
00247         boost::char_separator<char> newline("\r\n", "", boost::keep_empty_tokens);
00248         boost::char_separator<char> spaces(" \t");
00249         U32 line_counter = 1;
00250         
00251         tokenizer line_tokens(contents, newline);
00252         for(tokenizer::iterator line_iter = line_tokens.begin();
00253                 line_iter != line_tokens.end();
00254                 ++line_iter, ++line_counter)
00255         {
00256                 tokenizer word_tokens(*line_iter, spaces);
00257                 for(tokenizer::iterator word_iter = word_tokens.begin();
00258                         word_iter != word_tokens.end();
00259                         ++word_iter)
00260                 {
00261                         if((*word_iter)[0] == '/')
00262                         {
00263                                 break;   // skip to end of line on comments
00264                         }
00265                         positioned_token pt;// = new positioned_token();
00266                         pt.str = std::string(*word_iter);
00267                         pt.line = line_counter;
00268                         mTokens.push_back(pt);
00269                 }
00270         }
00271         mCurrent = mTokens.begin();
00272 }
00273 void LLTemplateTokenizer::inc()
00274 {
00275         if(atEOF())
00276         {
00277                 error("trying to increment token of EOF");
00278         }
00279         else if(mStarted)
00280         {
00281                 ++mCurrent;
00282         }
00283         else
00284         {
00285                 mStarted = true;
00286                 mCurrent = mTokens.begin();
00287         }
00288 }
00289 void LLTemplateTokenizer::dec()
00290 {
00291         if(mCurrent == mTokens.begin())
00292         {
00293                 if(mStarted)
00294                 {
00295                         mStarted = false;
00296                 }
00297                 else
00298                 {
00299                         error("trying to decrement past beginning of file");
00300                 }
00301         }
00302         else
00303         {
00304                 mCurrent--;
00305         }
00306 }
00307 
00308 std::string LLTemplateTokenizer::get() const
00309 {
00310         if(atEOF())
00311         {
00312                 error("trying to get EOF");
00313         }
00314         return mCurrent->str;
00315 }
00316 
00317 U32 LLTemplateTokenizer::line() const
00318 {
00319         if(atEOF())
00320         {
00321                 return 0;
00322         }
00323         return mCurrent->line;
00324 }
00325 
00326 bool LLTemplateTokenizer::atEOF() const
00327 {
00328         return mCurrent == mTokens.end();
00329 }
00330 
00331 std::string LLTemplateTokenizer::next()
00332 {
00333         inc();
00334         return get();
00335 }
00336 
00337 bool LLTemplateTokenizer::want(const std::string & token)
00338 {
00339         if(atEOF()) return false;
00340         inc();
00341         if(atEOF()) return false;
00342         if(get() != token)
00343         {
00344                 dec(); // back up a step
00345                 return false;
00346         }
00347         return true;
00348 }
00349 
00350 bool LLTemplateTokenizer::wantEOF()
00351 {
00352         // see if the next token is EOF
00353         if(atEOF()) return true;
00354         inc();
00355         if(!atEOF())
00356         {
00357                 dec(); // back up a step
00358                 return false;
00359         }
00360         return true;
00361 }
00362 
00363 void LLTemplateTokenizer::error(std::string message) const
00364 {
00365         if(atEOF())
00366         {
00367                 llerrs << "Unexpected end of file: " << message << llendl;
00368         }
00369         else
00370         {
00371                 llerrs << "Problem parsing message template at line "
00372                            << line() << ", with token '" << get() << "' : "
00373                            << message << llendl;
00374         }
00375 }
00376 
00377 
00378 // Done with tokenizer, next is the parser.
00379 
00380 LLTemplateParser::LLTemplateParser(LLTemplateTokenizer & tokens):
00381         mVersion(0.f),
00382         mMessages()
00383 {
00384         // the version number should be the first thing in the file
00385         if (tokens.want("version"))
00386         {
00387                 // version number
00388                 std::string vers_string = tokens.next();
00389                 mVersion = (F32)atof(vers_string.c_str());
00390                 
00391                 llinfos << "### Message template version " << mVersion << "  ###" << llendl;
00392         }
00393         else
00394         {
00395                 llerrs << "Version must be first in the message template, found "
00396                            << tokens.next() << llendl;
00397         }
00398 
00399         while(LLMessageTemplate * templatep = parseMessage(tokens))
00400         {
00401                 if (templatep->getDeprecation() != MD_DEPRECATED)
00402                 {
00403                         mMessages.push_back(templatep);
00404                 }
00405         }
00406 
00407         if(!tokens.wantEOF())
00408         {
00409                 llerrs << "Expected end of template or a message, instead found: "
00410                            << tokens.next() << " at " << tokens.line() << llendl;
00411         }
00412 }
00413 
00414 F32 LLTemplateParser::getVersion() const
00415 {
00416         return mVersion;
00417 }
00418 
00419 LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const
00420 {
00421         return mMessages.begin();
00422 }
00423 
00424 LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const
00425 {
00426         return mMessages.end();
00427 }
00428 
00429 
00430 // static
00431 LLMessageTemplate * LLTemplateParser::parseMessage(LLTemplateTokenizer & tokens)
00432 {
00433         LLMessageTemplate       *templatep = NULL;
00434         if(!tokens.want("{"))
00435         {
00436                 return NULL;
00437         }
00438 
00439         // name first
00440         std::string template_name = tokens.next();
00441         
00442         // is name a legit C variable name
00443         if (!b_variable_ok(template_name.c_str()))
00444         {
00445                 llerrs << "Not legit variable name: " << template_name << " at " << tokens.line() << llendl;
00446         }
00447 
00448         // ok, now get Frequency ("High", "Medium", or "Low")
00449         EMsgFrequency frequency = MFT_LOW;
00450         std::string freq_string = tokens.next();
00451         if (freq_string == "High")
00452         {
00453                 frequency = MFT_HIGH;
00454         }
00455         else if (freq_string == "Medium")
00456         {
00457                 frequency = MFT_MEDIUM;
00458         }
00459         else if (freq_string == "Low" || freq_string == "Fixed")
00460         {
00461                 frequency = MFT_LOW;
00462         }
00463         else
00464         {
00465                 llerrs << "Expected frequency, got " << freq_string << " at " << tokens.line() << llendl;
00466         }
00467 
00468         // TODO more explicit checking here pls
00469         U32 message_number = strtoul(tokens.next().c_str(),NULL,0);
00470 
00471         switch (frequency) {
00472         case MFT_HIGH:
00473                 break;
00474         case MFT_MEDIUM:
00475                 message_number = (255 << 8) | message_number;
00476                 break;
00477         case MFT_LOW:
00478                 message_number = (255 << 24) | (255 << 16) | message_number;
00479                 break;
00480         default:
00481                 llerrs << "Unknown frequency enum: " << frequency << llendl;
00482         }
00483    
00484         templatep = new LLMessageTemplate(
00485                 template_name.c_str(),
00486                 message_number,
00487                 frequency);
00488                 
00489         // Now get trust ("Trusted", "NotTrusted")
00490         std::string trust = tokens.next();
00491         if (trust == "Trusted")
00492         {
00493                 templatep->setTrust(MT_TRUST);
00494         }
00495         else if (trust == "NotTrusted")
00496         {
00497                 templatep->setTrust(MT_NOTRUST);
00498         }
00499         else
00500         {
00501                 llerrs << "Bad trust " << trust << " at " << tokens.line() << llendl;
00502         }
00503         
00504         // get encoding
00505         std::string encoding = tokens.next();
00506         if(encoding == "Unencoded")
00507         {
00508                 templatep->setEncoding(ME_UNENCODED);
00509         }
00510         else if(encoding == "Zerocoded")
00511         {
00512                 templatep->setEncoding(ME_ZEROCODED);
00513         }
00514         else
00515         {
00516                 llerrs << "Bad encoding " << encoding << " at " << tokens.line() << llendl;
00517         }
00518 
00519         // get deprecation
00520         if(tokens.want("Deprecated"))
00521         {
00522                 templatep->setDeprecation(MD_DEPRECATED);
00523         }
00524         else if (tokens.want("UDPDeprecated"))
00525         {
00526                 templatep->setDeprecation(MD_UDPDEPRECATED);
00527         }
00528         else if (tokens.want("NotDeprecated"))
00529         {
00530                 // this is the default value, but it can't hurt to set it twice
00531                 templatep->setDeprecation(MD_NOTDEPRECATED);
00532         }
00533         else {
00534                 // It's probably a brace, let's just start block processing
00535         }
00536 
00537         while(LLMessageBlock * blockp = parseBlock(tokens))
00538         {
00539                 templatep->addBlock(blockp);
00540         }
00541         
00542         if(!tokens.want("}"))
00543         {
00544                 llerrs << "Expecting closing } for message " << template_name
00545                            << " at " << tokens.line() << llendl;
00546         }
00547         return templatep;
00548 }
00549 
00550 // static
00551 LLMessageBlock * LLTemplateParser::parseBlock(LLTemplateTokenizer & tokens)
00552 {
00553         LLMessageBlock * blockp = NULL;
00554 
00555         if(!tokens.want("{"))
00556         {
00557                 return NULL;
00558         }
00559 
00560         // name first
00561         std::string block_name = tokens.next();
00562 
00563         // is name a legit C variable name
00564         if (!b_variable_ok(block_name.c_str()))
00565         {
00566                 llerrs << "not a legal block name: " << block_name
00567                            << " at " << tokens.line() << llendl;
00568         }
00569 
00570         // now, block type ("Single", "Multiple", or "Variable")
00571         std::string block_type = tokens.next();
00572         // which one is it?
00573         if (block_type == "Single")
00574         {
00575                 // ok, we can create a block
00576                 blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE);
00577         }
00578         else if (block_type == "Multiple")
00579         {
00580                 // need to get the number of repeats
00581                 std::string repeats = tokens.next();
00582                 
00583                 // is it a legal integer
00584                 if (!b_positive_integer_ok(repeats.c_str()))
00585                 {
00586                         llerrs << "not a legal integer for block multiple count: "
00587                                    << repeats << " at " << tokens.line() << llendl;
00588                 }
00589                 
00590                 // ok, we can create a block
00591                 blockp = new LLMessageBlock(block_name.c_str(),
00592                                                                         MBT_MULTIPLE,
00593                                                                         atoi(repeats.c_str()));
00594         }
00595         else if (block_type == "Variable")
00596         {
00597                 // ok, we can create a block
00598                 blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE);
00599         }
00600         else
00601         {
00602                 llerrs << "bad block type: " << block_type
00603                            << " at " << tokens.line() << llendl;
00604         }
00605 
00606 
00607         while(LLMessageVariable * varp = parseVariable(tokens))
00608         {
00609                 blockp->addVariable(varp->getName(),
00610                                                         varp->getType(),
00611                                                         varp->getSize());
00612                 delete varp;
00613         }
00614 
00615         if(!tokens.want("}"))
00616         {
00617                 llerrs << "Expecting closing } for block " << block_name
00618                            << " at " << tokens.line() << llendl;
00619         }
00620         return blockp;
00621    
00622 }
00623 
00624 // static
00625 LLMessageVariable * LLTemplateParser::parseVariable(LLTemplateTokenizer & tokens)
00626 {
00627         LLMessageVariable * varp = NULL;
00628         if(!tokens.want("{"))
00629         {
00630                 return NULL;
00631         }
00632 
00633         std::string var_name = tokens.next();
00634 
00635         if (!b_variable_ok(var_name.c_str()))
00636         {
00637                 llerrs << "Not a legit variable name: " << var_name
00638                            << " at " << tokens.line() << llendl;
00639         }
00640 
00641         std::string var_type = tokens.next();
00642 
00643         if (var_type == "U8")
00644         {
00645                 varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1);                                      
00646         }
00647         else if (var_type == "U16")
00648         {
00649                 varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2);                                     
00650         }
00651         else if (var_type == "U32")
00652         {
00653                 varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4);                                     
00654         }
00655         else if (var_type == "U64")
00656         {
00657                 varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8);                                     
00658         }
00659         else if (var_type == "S8")
00660         {
00661                 varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1);                                      
00662         }
00663         else if (var_type == "S16")
00664         {
00665                 varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2);                                     
00666         }
00667         else if (var_type == "S32")
00668         {
00669                 varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4);                                     
00670         }
00671         else if (var_type == "S64")
00672         {
00673                 varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8);                                     
00674         }
00675         else if (var_type == "F32")
00676         {
00677                 varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4);                                     
00678         }
00679         else if (var_type == "F64")
00680         {
00681                 varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8);                                     
00682         }
00683         else if (var_type == "LLVector3")
00684         {
00685                 varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12);                                      
00686         }
00687         else if (var_type == "LLVector3d")
00688         {
00689                 varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24);
00690         }
00691         else if (var_type == "LLVector4")
00692         {
00693                 varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16);                                      
00694         }
00695         else if (var_type == "LLQuaternion")
00696         {
00697                 varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12);
00698         }
00699         else if (var_type == "LLUUID")
00700         {
00701                 varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16);                                 
00702         }
00703         else if (var_type == "BOOL")
00704         {
00705                 varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1);                                    
00706         }
00707         else if (var_type == "IPADDR")
00708         {
00709                 varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4);                                 
00710         }
00711         else if (var_type == "IPPORT")
00712         {
00713                 varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2);
00714         }
00715         else if (var_type == "Fixed" || var_type == "Variable")
00716         {
00717                 std::string variable_size = tokens.next();
00718                 
00719                 if (!b_positive_integer_ok(variable_size.c_str()))
00720                 {
00721                         llerrs << "not a legal integer variable size: " << variable_size
00722                                    << " at " << tokens.line() << llendl;
00723                 }
00724 
00725                 EMsgVariableType type_enum;
00726                 if(var_type == "Variable")
00727                 {
00728                         type_enum = MVT_VARIABLE;
00729                 }
00730                 else if(var_type == "Fixed")
00731                 {
00732                         type_enum = MVT_FIXED;
00733                 }
00734                 else
00735                 {
00736                         type_enum = MVT_FIXED; // removes a warning
00737                         llerrs << "bad variable type: " << var_type
00738                                    << " at " << tokens.line() << llendl;
00739                 }
00740 
00741                 varp = new LLMessageVariable(
00742                         var_name.c_str(),
00743                         type_enum,
00744                         atoi(variable_size.c_str()));
00745         }
00746         else
00747         {
00748                 llerrs << "bad variable type:" << var_type
00749                            << " at " << tokens.line() << llendl;
00750         }
00751 
00752         if(!tokens.want("}"))
00753         {
00754                 llerrs << "Expecting closing } for variable " << var_name
00755                            << " at " << tokens.line() << llendl;
00756         }
00757         return varp;
00758 }

Generated on Thu Jul 1 06:08:53 2010 for Second Life Viewer by  doxygen 1.4.7