00001 """\
00002 @file named_query.py
00003 @author Ryan Williams, Phoenix
00004 @date 2007-07-31
00005 @brief An API for running named queries.
00006
00007 $LicenseInfo:firstyear=2007&license=mit$
00008
00009 Copyright (c) 2007-2008, Linden Research, Inc.
00010
00011 Permission is hereby granted, free of charge, to any person obtaining a copy
00012 of this software and associated documentation files (the "Software"), to deal
00013 in the Software without restriction, including without limitation the rights
00014 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00015 copies of the Software, and to permit persons to whom the Software is
00016 furnished to do so, subject to the following conditions:
00017
00018 The above copyright notice and this permission notice shall be included in
00019 all copies or substantial portions of the Software.
00020
00021 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00022 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00024 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00026 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00027 THE SOFTWARE.
00028 $/LicenseInfo$
00029 """
00030
00031 import errno
00032 import MySQLdb
00033 import MySQLdb.cursors
00034 import os
00035 import os.path
00036 import re
00037 import time
00038
00039
00040
00041
00042 try:
00043 set = set
00044 except NameError:
00045 from sets import Set as set
00046
00047 from indra.base import llsd
00048 from indra.base import config
00049
00050 NQ_FILE_SUFFIX = config.get('named-query-file-suffix', '')
00051 NQ_FILE_SUFFIX_LEN = len(NQ_FILE_SUFFIX)
00052
00053 _g_named_manager = None
00054
00055 def _init_g_named_manager(sql_dir = None):
00056 """Initializes a global NamedManager object to point at a
00057 specified named queries hierarchy.
00058
00059 This function is intended entirely for testing purposes,
00060 because it's tricky to control the config from inside a test."""
00061 if sql_dir is None:
00062 sql_dir = config.get('named-query-base-dir')
00063
00064
00065 if sql_dir is None:
00066 sql_dir = os.path.dirname(__file__) + "../../../../web/dataservice/sql"
00067
00068 global _g_named_manager
00069 _g_named_manager = NamedQueryManager(
00070 os.path.abspath(os.path.realpath(sql_dir)))
00071
00072 def get(name):
00073 "Get the named query object to be used to perform queries"
00074 if _g_named_manager is None:
00075 _init_g_named_manager()
00076 return _g_named_manager.get(name)
00077
00078 def sql(connection, name, params):
00079
00080 return get(name).sql(connection, params)
00081
00082 def run(connection, name, params, expect_rows = None):
00083 """\
00084 @brief given a connection, run a named query with the params
00085
00086 Note that this function will fetch ALL rows.
00087 @param connection The connection to use
00088 @param name The name of the query to run
00089 @param params The parameters passed into the query
00090 @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
00091 @return Returns the result set as a list of dicts.
00092 """
00093 return get(name).run(connection, params, expect_rows)
00094
00095 class ExpectationFailed(Exception):
00096 """ Exception that is raised when an expectation for an sql query
00097 is not met."""
00098 def __init__(self, message):
00099 Exception.__init__(self, message)
00100 self.message = message
00101
00102 class NamedQuery(object):
00103 def __init__(self, name, filename):
00104 """ Construct a NamedQuery object. The name argument is an
00105 arbitrary name as a handle for the query, and the filename is
00106 a path to a file containing an llsd named query document."""
00107 self._stat_interval_seconds = 5
00108 self._name = name
00109 if (filename is not None) \
00110 and (NQ_FILE_SUFFIX != filename[-NQ_FILE_SUFFIX_LEN:]):
00111 filename = filename + NQ_FILE_SUFFIX
00112 self._location = filename
00113 self._alternative = dict()
00114 self._last_mod_time = 0
00115 self._last_check_time = 0
00116 self.deleted = False
00117 self.load_contents()
00118
00119 def name(self):
00120 """ The name of the query. """
00121 return self._name
00122
00123 def get_modtime(self):
00124 """ Returns the mtime (last modified time) of the named query
00125 file, if such exists."""
00126 if self._location:
00127 return os.path.getmtime(self._location)
00128 return 0
00129
00130 def load_contents(self):
00131 """ Loads and parses the named query file into self. Does
00132 nothing if self.location is nonexistant."""
00133 if self._location:
00134 self._reference_contents(llsd.parse(open(self._location).read()))
00135
00136 try:
00137 for name, alt in self._contents['alternative'].items():
00138 nq = NamedQuery(name, None)
00139 nq._reference_contents(alt)
00140 self._alternative[name] = nq
00141 except KeyError, e:
00142 pass
00143 self._last_mod_time = self.get_modtime()
00144 self._last_check_time = time.time()
00145
00146 def _reference_contents(self, contents):
00147 "Helper method which builds internal structure from parsed contents"
00148 self._contents = contents
00149 self._ttl = int(self._contents.get('ttl', 0))
00150 self._return_as_map = bool(self._contents.get('return_as_map', False))
00151 self._legacy_dbname = self._contents.get('legacy_dbname', None)
00152
00153
00154
00155
00156 self._around = set()
00157 self._append = set()
00158 self._integer = set()
00159 self._options = self._contents.get('dynamic_where', {})
00160 for key in self._options:
00161 if isinstance(self._options[key], basestring):
00162 self._options[key] = self._convert_sql(self._options[key])
00163 elif isinstance(self._options[key], list):
00164 lines = []
00165 for line in self._options[key]:
00166 lines.append(self._convert_sql(line))
00167 self._options[key] = lines
00168 else:
00169 moreopt = {}
00170 for kk in self._options[key]:
00171 moreopt[kk] = self._convert_sql(self._options[key][kk])
00172 self._options[key] = moreopt
00173 self._base_query = self._convert_sql(self._contents['base_query'])
00174 self._query_suffix = self._convert_sql(
00175 self._contents.get('query_suffix', ''))
00176
00177 def _convert_sql(self, sql):
00178 """convert the parsed sql into a useful internal structure.
00179
00180 This function has to turn the named query format into a pyformat
00181 style. It also has to look for %:name% and :name% and
00182 ready them for use in LIKE statements"""
00183 if sql:
00184
00185 expr = re.compile("(%?):([a-zA-Z][a-zA-Z0-9_-]*)%")
00186 sql = expr.sub(self._prepare_like, sql)
00187 expr = re.compile("#:([a-zA-Z][a-zA-Z0-9_-]*)")
00188 sql = expr.sub(self._prepare_integer, sql)
00189 expr = re.compile(":([a-zA-Z][a-zA-Z0-9_-]*)")
00190 sql = expr.sub("%(\\1)s", sql)
00191 return sql
00192
00193 def _prepare_like(self, match):
00194 """This function changes LIKE statement replace behavior
00195
00196 It works by turning %:name% to %(_name_around)s and :name% to
00197 %(_name_append)s. Since a leading '_' is not a valid keyname
00198 input (enforced via unit tests), it will never clash with
00199 existing keys. Then, when building the statement, the query
00200 runner will generate corrected strings."""
00201 if match.group(1) == '%':
00202
00203 self._around.add(match.group(2))
00204 return "%(" + self._build_around_key(match.group(2)) + ")s"
00205 else:
00206
00207 self._append.add(match.group(2))
00208 return "%(" + self._build_append_key(match.group(2)) + ")s"
00209
00210 def _build_around_key(self, key):
00211 return "_" + key + "_around"
00212
00213 def _build_append_key(self, key):
00214 return "_" + key + "_append"
00215
00216 def _prepare_integer(self, match):
00217 """This function adjusts the sql for #:name replacements
00218
00219 It works by turning #:name to %(_name_as_integer)s. Since a
00220 leading '_' is not a valid keyname input (enforced via unit
00221 tests), it will never clash with existing keys. Then, when
00222 building the statement, the query runner will generate
00223 corrected strings."""
00224 self._integer.add(match.group(1))
00225 return "%(" + self._build_integer_key(match.group(1)) + ")s"
00226
00227 def _build_integer_key(self, key):
00228 return "_" + key + "_as_integer"
00229
00230 def _strip_wildcards_to_list(self, value):
00231 """Take string, and strip out the LIKE special characters.
00232
00233 Technically, this is database dependant, but postgresql and
00234 mysql use the same wildcards, and I am not aware of a general
00235 way to handle this. I think you need a sql statement of the
00236 form:
00237
00238 LIKE_STRING( [ANY,ONE,str]... )
00239
00240 which would treat ANY as their any string, and ONE as their
00241 single glyph, and str as something that needs database
00242 specific encoding to not allow any % or _ to affect the query.
00243
00244 As it stands, I believe it's impossible to write a named query
00245 style interface which uses like to search the entire space of
00246 text available. Imagine the query:
00247
00248 % of brain used by average linden
00249
00250 In order to search for %, it must be escaped, so once you have
00251 escaped the string to not do wildcard searches, and be escaped
00252 for the database, and then prepended the wildcard you come
00253 back with one of:
00254
00255 1) %\% of brain used by average linden
00256 2) %%% of brain used by average linden
00257
00258 Then, when passed to the database to be escaped to be database
00259 safe, you get back:
00260
00261 1) %\\% of brain used by average linden
00262 : which means search for any character sequence, followed by a
00263 backslash, followed by any sequence, followed by ' of
00264 brain...'
00265 2) %%% of brain used by average linden
00266 : which (I believe) means search for a % followed by any
00267 character sequence followed by 'of brain...'
00268
00269 Neither of which is what we want!
00270
00271 So, we need a vendor (or extention) for LIKE_STRING. Anyone
00272 want to write it?"""
00273 utf8_value = unicode(value, "utf-8")
00274 esc_list = []
00275 remove_chars = set(u"%_")
00276 for glyph in utf8_value:
00277 if glyph in remove_chars:
00278 continue
00279 esc_list.append(glyph.encode("utf-8"))
00280 return esc_list
00281
00282 def delete(self):
00283 """ Makes this query unusable by deleting all the members and
00284 setting the deleted member. This is desired when the on-disk
00285 query has been deleted but the in-memory copy remains."""
00286
00287 name, location = self._name, self._location
00288 for key in self.__dict__.keys():
00289 del self.__dict__[key]
00290 self.deleted = True
00291 self._name, self._location = name, location
00292
00293 def ttl(self):
00294 """ Estimated time to live of this query. Used for web
00295 services to set the Expires header."""
00296 return self._ttl
00297
00298 def legacy_dbname(self):
00299 return self._legacy_dbname
00300
00301 def return_as_map(self):
00302 """ Returns true if this query is configured to return its
00303 results as a single map (as opposed to a list of maps, the
00304 normal behavior)."""
00305
00306 return self._return_as_map
00307
00308 def for_schema(self, db_name):
00309 "Look trough the alternates and return the correct query"
00310 try:
00311 return self._alternative[db_name]
00312 except KeyError, e:
00313 pass
00314 return self
00315
00316 def run(self, connection, params, expect_rows = None, use_dictcursor = True):
00317 """given a connection, run a named query with the params
00318
00319 Note that this function will fetch ALL rows. We do this because it
00320 opens and closes the cursor to generate the values, and this
00321 isn't a generator so the cursor has no life beyond the method call.
00322
00323 @param cursor The connection to use (this generates its own cursor for the query)
00324 @param name The name of the query to run
00325 @param params The parameters passed into the query
00326 @param expect_rows The number of rows expected. Set to 1 if return_as_map is true. Raises ExpectationFailed if the number of returned rows doesn't exactly match. Kind of a hack.
00327 @param use_dictcursor Set to false to use a normal cursor and manually convert the rows to dicts.
00328 @return Returns the result set as a list of dicts, or, if the named query has return_as_map set to true, returns a single dict.
00329 """
00330 if use_dictcursor:
00331 cursor = connection.cursor(MySQLdb.cursors.DictCursor)
00332 else:
00333 cursor = connection.cursor()
00334
00335 statement = self.sql(connection, params)
00336
00337 rows = cursor.execute(statement)
00338
00339
00340
00341
00342 if(self._return_as_map):
00343 expect_rows = 1
00344 if expect_rows is not None and rows != expect_rows:
00345 cursor.close()
00346 raise ExpectationFailed("Statement expected %s rows, got %s. Sql: %s" % (
00347 expect_rows, rows, statement))
00348
00349
00350 if use_dictcursor:
00351 result_set = cursor.fetchall()
00352 else:
00353 if cursor.description is None:
00354
00355 x = cursor.fetchall()
00356 cursor.close()
00357 return x
00358
00359 names = [x[0] for x in cursor.description]
00360
00361 result_set = []
00362 for row in cursor.fetchall():
00363 converted_row = {}
00364 for idx, col_name in enumerate(names):
00365 converted_row[col_name] = row[idx]
00366 result_set.append(converted_row)
00367
00368 cursor.close()
00369 if self._return_as_map:
00370 return result_set[0]
00371 return result_set
00372
00373 def sql(self, connection, params):
00374 """ Generates an SQL statement from the named query document
00375 and a dictionary of parameters.
00376
00377 """
00378 self.refresh()
00379
00380
00381 base_query = []
00382 base_query.append(self._base_query)
00383
00384 for opt, extra_where in self._options.items():
00385 if type(extra_where) in (dict, list, tuple):
00386 if opt in params:
00387 base_query.append(extra_where[params[opt]])
00388 else:
00389 if opt in params and params[opt]:
00390 base_query.append(extra_where)
00391 if self._query_suffix:
00392 base_query.append(self._query_suffix)
00393
00394 full_query = '\n'.join(base_query)
00395
00396
00397
00398 rewrite = _RewriteQueryForArray(params)
00399 expr = re.compile("@%\(([a-zA-Z][a-zA-Z0-9_-]*)\)s")
00400 full_query = expr.sub(rewrite.operate, full_query)
00401 params.update(rewrite.new_params)
00402
00403
00404
00405
00406
00407
00408
00409
00410 new_params = {}
00411 for key in params:
00412 if key in self._around:
00413 new_value = ['%']
00414 new_value.extend(self._strip_wildcards_to_list(params[key]))
00415 new_value.append('%')
00416 new_params[self._build_around_key(key)] = ''.join(new_value)
00417 if key in self._append:
00418 new_value = self._strip_wildcards_to_list(params[key])
00419 new_value.append('%')
00420 new_params[self._build_append_key(key)] = ''.join(new_value)
00421 if key in self._integer:
00422 new_params[self._build_integer_key(key)] = int(params[key])
00423 params.update(new_params)
00424
00425
00426
00427 sql = full_query % connection.literal(params)
00428 return sql
00429
00430 def refresh(self):
00431 """ Refresh self from the file on the filesystem.
00432
00433 This is optimized to be callable as frequently as you wish,
00434 without adding too much load. It does so by only stat-ing the
00435 file every N seconds, where N defaults to 5 and is
00436 configurable through the member _stat_interval_seconds. If the stat
00437 reveals that the file has changed, refresh will re-parse the
00438 contents of the file and use them to update the named query
00439 instance. If the stat reveals that the file has been deleted,
00440 refresh will call self.delete to make the in-memory
00441 representation unusable."""
00442 now = time.time()
00443 if(now - self._last_check_time > self._stat_interval_seconds):
00444 self._last_check_time = now
00445 try:
00446 modtime = self.get_modtime()
00447 if(modtime > self._last_mod_time):
00448 self.load_contents()
00449 except OSError, e:
00450 if e.errno == errno.ENOENT:
00451 self.delete()
00452 raise
00453
00454 class NamedQueryManager(object):
00455 """ Manages the lifespan of NamedQuery objects, drawing from a
00456 directory hierarchy of named query documents.
00457
00458 In practice this amounts to a memory cache of NamedQuery objects."""
00459
00460 def __init__(self, named_queries_dir):
00461 """ Initializes a manager to look for named queries in a
00462 directory."""
00463 self._dir = os.path.abspath(os.path.realpath(named_queries_dir))
00464 self._cached_queries = {}
00465
00466 def sql(self, connection, name, params):
00467 nq = self.get(name)
00468 return nq.sql(connection, params)
00469
00470 def get(self, name):
00471 """ Returns a NamedQuery instance based on the name, either
00472 from memory cache, or by parsing from disk.
00473
00474 The name is simply a relative path to the directory associated
00475 with the manager object. Before returning the instance, the
00476 NamedQuery object is cached in memory, so that subsequent
00477 accesses don't have to read from disk or do any parsing. This
00478 means that NamedQuery objects returned by this method are
00479 shared across all users of the manager object.
00480 NamedQuery.refresh is used to bring the NamedQuery objects in
00481 sync with the actual files on disk."""
00482 nq = self._cached_queries.get(name)
00483 if nq is None:
00484 nq = NamedQuery(name, os.path.join(self._dir, name))
00485 self._cached_queries[name] = nq
00486 else:
00487 try:
00488 nq.refresh()
00489 except OSError, e:
00490 if e.errno == errno.ENOENT:
00491 del self._cached_queries[name]
00492 raise
00493
00494 return nq
00495
00496 class _RewriteQueryForArray(object):
00497 "Helper class for rewriting queries with the @:name syntax"
00498 def __init__(self, params):
00499 self.params = params
00500 self.new_params = dict()
00501
00502 def operate(self, match):
00503 "Given a match, return the string that should be in use"
00504 key = match.group(1)
00505 value = self.params[key]
00506 if type(value) in (list,tuple):
00507 rv = []
00508 for idx in range(len(value)):
00509
00510
00511 new_key = "_%s_%s"%(key, str(idx))
00512 val_item = value[idx]
00513 if type(val_item) in (list, tuple, dict):
00514 if type(val_item) is dict:
00515
00516
00517
00518
00519
00520 raise ExpectationFailed('Only lists/tuples allowed,\
00521 received dict')
00522 values_keys = []
00523 for value_idx, item in enumerate(val_item):
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543 values_key = "%s_%s"%(new_key, value_idx)
00544 self.new_params[values_key] = item
00545 values_keys.append("%%(%s)s"%values_key)
00546
00547
00548
00549
00550 rv.append('(%s)' % ','.join(values_keys))
00551 else:
00552 self.new_params[new_key] = val_item
00553 rv.append("%%(%s)s"%new_key)
00554 return ','.join(rv)
00555 else:
00556
00557
00558
00559
00560
00561
00562
00563 return match.group(0)[1:]