webdav.py

Go to the documentation of this file.
00001 """
00002 @file webdav.py
00003 @brief Classes to make manipulation of a webdav store easier.
00004 
00005 $LicenseInfo:firstyear=2007&license=mit$
00006 
00007 Copyright (c) 2007-2008, Linden Research, Inc.
00008 
00009 Permission is hereby granted, free of charge, to any person obtaining a copy
00010 of this software and associated documentation files (the "Software"), to deal
00011 in the Software without restriction, including without limitation the rights
00012 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00013 copies of the Software, and to permit persons to whom the Software is
00014 furnished to do so, subject to the following conditions:
00015 
00016 The above copyright notice and this permission notice shall be included in
00017 all copies or substantial portions of the Software.
00018 
00019 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00020 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00021 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00022 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00023 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00024 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00025 THE SOFTWARE.
00026 $/LicenseInfo$
00027 """
00028 
00029 import sys, os, httplib, urlparse
00030 import socket, time
00031 import xml.dom.minidom
00032 import syslog
00033 # import signal
00034 
00035 __revision__ = '0'
00036 
00037 dav_debug = False
00038 
00039 
00040 # def urlsafe_b64decode (enc):
00041 #     return base64.decodestring (enc.replace ('_', '/').replace ('-', '+'))
00042 
00043 # def urlsafe_b64encode (str):
00044 #     return base64.encodestring (str).replace ('+', '-').replace ('/', '_')
00045 
00046 
00047 class DAVError (Exception):
00048     """ Base class for exceptions in this module. """
00049     def __init__ (self, status=0, message='', body='', details=''):
00050         self.status = status
00051         self.message = message
00052         self.body = body
00053         self.details = details
00054         Exception.__init__ (self, '%d:%s:%s%s' % (self.status, self.message,
00055                                                    self.body, self.details))
00056 
00057     def print_to_stderr (self):
00058         """ print_to_stderr docstring """
00059         print >> sys.stderr, str (self.status) + ' ' + self.message
00060         print >> sys.stderr, str (self.details)
00061 
00062 
00063 class Timeout (Exception):
00064     """ Timeout docstring """
00065     def __init__ (self, arg=''):
00066         Exception.__init__ (self, arg)
00067 
00068 
00069 def alarm_handler (signum, frame):
00070     """ alarm_handler docstring """
00071     raise Timeout ('caught alarm')
00072 
00073 
00074 class WebDAV:
00075     """ WebDAV docstring """
00076     def __init__ (self, url, proxy=None, retries_before_fail=6):
00077         self.init_url = url
00078         self.init_proxy = proxy
00079         self.retries_before_fail = retries_before_fail
00080         url_parsed = urlparse.urlsplit (url)
00081 
00082         self.top_path = url_parsed[ 2 ]
00083         # make sure top_path has a trailing /
00084         if self.top_path == None or self.top_path == '':
00085             self.top_path = '/'
00086         elif len (self.top_path) > 1 and self.top_path[-1:] != '/':
00087             self.top_path += '/'
00088 
00089         if dav_debug:
00090             syslog.syslog ('new WebDAV %s : %s' % (str (url), str (proxy)))
00091 
00092         if proxy:
00093             proxy_parsed = urlparse.urlsplit (proxy)
00094             self.host_header = url_parsed[ 1 ]
00095             host_and_port = proxy_parsed[ 1 ].split (':')
00096             self.host = host_and_port[ 0 ]
00097             if len (host_and_port) > 1:
00098                 self.port = int(host_and_port[ 1 ])
00099             else:
00100                 self.port = 80
00101         else: # no proxy
00102             host_and_port = url_parsed[ 1 ].split (':')
00103             self.host_header = None
00104             self.host = host_and_port[ 0 ]
00105             if len (host_and_port) > 1:
00106                 self.port = int(host_and_port[ 1 ])
00107             else:
00108                 self.port = 80
00109 
00110         self.connection = False
00111         self.connect ()
00112 
00113 
00114     def log (self, msg, depth=0):
00115         """ log docstring """
00116         if dav_debug and depth == 0:
00117             host = str (self.init_url)
00118             if host == 'http://int.tuco.lindenlab.com:80/asset/':
00119                 host = 'tuco'
00120             if host == 'http://harriet.lindenlab.com/asset-keep/':
00121                 host = 'harriet/asset-keep'
00122             if host == 'http://harriet.lindenlab.com/asset-flag/':
00123                 host = 'harriet/asset-flag'
00124             if host == 'http://harriet.lindenlab.com/asset/':
00125                 host = 'harriet/asset'
00126             if host == 'http://ozzy.lindenlab.com/asset/':
00127                 host = 'ozzy/asset'
00128             if host == 'http://station11.lindenlab.com:12041/:':
00129                 host = 'station11:12041'
00130             proxy = str (self.init_proxy)
00131             if proxy == 'None':
00132                 proxy = ''
00133             if proxy == 'http://int.tuco.lindenlab.com:3128/':
00134                 proxy = 'tuco'
00135             syslog.syslog ('WebDAV (%s:%s) %s' % (host, proxy, str (msg)))
00136 
00137 
00138     def connect (self):
00139         """ connect docstring """
00140         self.log ('connect')
00141         self.connection = httplib.HTTPConnection (self.host, self.port)
00142 
00143     def __err (self, response, details):
00144         """ __err docstring """
00145         raise DAVError (response.status, response.reason, response.read (),
00146                         str (self.init_url) + ':' + \
00147                         str (self.init_proxy) + ':' + str (details))
00148 
00149     def request (self, method, path, body=None, headers=None,
00150                  read_all=True, body_hook = None, recurse=0, allow_cache=True):
00151         """ request docstring """
00152         # self.log ('request %s %s' % (method, path))
00153         if headers == None:
00154             headers = {}
00155         if not allow_cache:
00156             headers['Pragma'] = 'no-cache'
00157             headers['cache-control'] = 'no-cache'
00158         try:
00159             if method.lower () != 'purge':
00160                 if path.startswith ('/'):
00161                     path = path[1:]
00162                 if self.host_header: # use proxy
00163                     headers[ 'host' ] = self.host_header
00164                     fullpath = 'http://%s%s%s' % (self.host_header,
00165                                                   self.top_path, path)
00166                 else: # no proxy
00167                     fullpath = self.top_path + path
00168             else:
00169                 fullpath = path
00170 
00171             self.connection.request (method, fullpath, body, headers)
00172             if body_hook:
00173                 body_hook ()
00174 
00175             # signal.signal (signal.SIGALRM, alarm_handler)
00176             # try:
00177             #     signal.alarm (120)
00178             #     signal.alarm (0)
00179             # except Timeout, e:
00180             #     if recurse < 6:
00181             #         return self.retry_request (method, path, body, headers,
00182             #                                    read_all, body_hook, recurse)
00183             #     else:
00184             #         raise DAVError (0, 'timeout', self.host,
00185             #                         (method, path, body, headers, recurse))
00186 
00187             response = self.connection.getresponse ()
00188 
00189             if read_all:
00190                 while len (response.read (1024)) > 0:
00191                     pass
00192             if (response.status == 500 or \
00193                 response.status == 503 or \
00194                 response.status == 403) and \
00195                 recurse < self.retries_before_fail:
00196                 return self.retry_request (method, path, body, headers,
00197                                            read_all, body_hook, recurse)
00198             return response
00199         except (httplib.ResponseNotReady,
00200                 httplib.BadStatusLine,
00201                 socket.error):
00202             # if the server hangs up on us (keepalive off, broken pipe),
00203             # we need to reconnect and try again.
00204             if recurse < self.retries_before_fail:
00205                 return self.retry_request (method, path, body, headers,
00206                                            read_all, body_hook, recurse)
00207             raise DAVError (0, 'reconnect failed', self.host,
00208                             (method, path, body, headers, recurse))
00209 
00210 
00211     def retry_request (self, method, path, body, headers,
00212                        read_all, body_hook, recurse):
00213         """ retry_request docstring """
00214         time.sleep (10.0 * recurse)
00215         self.connect ()
00216         return self.request (method, path, body, headers,
00217                              read_all, body_hook, recurse+1)
00218 
00219 
00220 
00221     def propfind (self, path, body=None, depth=1):
00222         """ propfind docstring """
00223         # self.log ('propfind %s' % path)
00224         headers = {'Content-Type':'text/xml; charset="utf-8"',
00225                    'Depth':str(depth)}
00226         response = self.request ('PROPFIND', path, body, headers, False)
00227         if response.status == 207:
00228             return response # Multi-Status
00229         self.__err (response, ('PROPFIND', path, body, headers, 0))
00230 
00231 
00232     def purge (self, path):
00233         """ issue a squid purge command """
00234         headers = {'Accept':'*/*'}
00235         response = self.request ('PURGE', path, None, headers)
00236         if response.status == 200 or response.status == 404:
00237             # 200 if it was purge, 404 if it wasn't there.
00238             return response
00239         self.__err (response, ('PURGE', path, None, headers))
00240 
00241 
00242     def get_file_size (self, path):
00243         """
00244         Use propfind to ask a webdav server what the size of
00245         a file is.  If used on a directory (collection) return 0
00246         """
00247         self.log ('get_file_size %s' % path)
00248         # "getcontentlength" property
00249         # 8.1.1 Example - Retrieving Named Properties
00250         # http://docs.python.org/lib/module-xml.dom.html
00251         nsurl = 'http://apache.org/dav/props/'
00252         doc = xml.dom.minidom.Document ()
00253         propfind_element = doc.createElementNS (nsurl, "D:propfind")
00254         propfind_element.setAttributeNS (nsurl, 'xmlns:D', 'DAV:')
00255         doc.appendChild (propfind_element)
00256         prop_element = doc.createElementNS (nsurl, "D:prop")
00257         propfind_element.appendChild (prop_element)
00258         con_len_element = doc.createElementNS (nsurl, "D:getcontentlength")
00259         prop_element.appendChild (con_len_element)
00260 
00261         response = self.propfind (path, doc.toxml ())
00262         doc.unlink ()
00263 
00264         resp_doc = xml.dom.minidom.parseString (response.read ())
00265         cln = resp_doc.getElementsByTagNameNS ('DAV:','getcontentlength')[ 0 ]
00266         try:
00267             content_length = int (cln.childNodes[ 0 ].nodeValue)
00268         except IndexError:
00269             return 0
00270         resp_doc.unlink ()
00271         return content_length
00272 
00273 
00274     def file_exists (self, path):
00275         """
00276         do an http head on the given file.  return True if it succeeds
00277         """
00278         self.log ('file_exists %s' % path)
00279         expect_gzip = path.endswith ('.gz')
00280         response = self.request ('HEAD', path)
00281         got_gzip = response.getheader ('Content-Encoding', '').strip ()
00282         if got_gzip.lower () == 'x-gzip' and expect_gzip == False:
00283             # the asset server fakes us out if we ask for the non-gzipped
00284             # version of an asset, but the server has the gzipped version.
00285             return False
00286         return response.status == 200
00287 
00288 
00289     def mkdir (self, path):
00290         """ mkdir docstring """
00291         self.log ('mkdir %s' % path)
00292         headers = {}
00293         response = self.request ('MKCOL', path, None, headers)
00294         if response.status == 201:
00295             return # success
00296         if response.status == 405:
00297             return # directory already existed?
00298         self.__err (response, ('MKCOL', path, None, headers, 0))
00299 
00300 
00301     def delete (self, path):
00302         """ delete docstring """
00303         self.log ('delete %s' % path)
00304         headers = {'Depth':'infinity'} # collections require infinity
00305         response = self.request ('DELETE', path, None, headers)
00306         if response.status == 204:
00307             return # no content
00308         if response.status == 404:
00309             return # hmm
00310         self.__err (response, ('DELETE', path, None, headers, 0))
00311 
00312 
00313     def list_directory (self, path, dir_filter=None, allow_cache=True,
00314                         minimum_cache_time=False):
00315         """
00316         Request an http directory listing and parse the filenames out of lines
00317         like: '<LI><A HREF="X"> X</A>'. If a filter function is provided,
00318         only return filenames that the filter returns True for.
00319 
00320         This is sort of grody, but it seems faster than other ways of getting
00321         this information from an isilon.
00322         """
00323         self.log ('list_directory %s' % path)
00324 
00325         def try_match (lline, before, after):
00326             """ try_match docstring """
00327             try:
00328                 blen = len (before)
00329                 asset_start_index = lline.index (before)
00330                 asset_end_index = lline.index (after, asset_start_index + blen)
00331                 asset = line[ asset_start_index + blen : asset_end_index ]
00332 
00333                 if not dir_filter or dir_filter (asset):
00334                     return [ asset ]
00335                 return []
00336             except ValueError:
00337                 return []
00338 
00339         if len (path) > 0 and path[-1:] != '/':
00340             path += '/'
00341 
00342         response = self.request ('GET', path, None, {}, False,
00343                                  allow_cache=allow_cache)
00344 
00345         if allow_cache and minimum_cache_time: # XXX
00346             print response.getheader ('Date')
00347             # s = "2005-12-06T12:13:14"
00348             # from datetime import datetime
00349             # from time import strptime
00350             # datetime(*strptime(s, "%Y-%m-%dT%H:%M:%S")[0:6])
00351             # datetime.datetime(2005, 12, 6, 12, 13, 14)
00352 
00353         if response.status != 200:
00354             self.__err (response, ('GET', path, None, {}, 0))
00355         assets = []
00356         for line in response.read ().split ('\n'):
00357             lline = line.lower ()
00358             if lline.find ("parent directory") == -1:
00359                 # isilon file
00360                 assets += try_match (lline, '<li><a href="', '"> ')
00361                 # apache dir
00362                 assets += try_match (lline, 'alt="[dir]"> <a href="', '/">')
00363                 # apache file
00364                 assets += try_match (lline, 'alt="[   ]"> <a href="', '">')
00365         return assets
00366 
00367 
00368     def __tmp_filename (self, path_and_file):
00369         """ __tmp_filename docstring """
00370         head, tail = os.path.split (path_and_file)
00371         if head != '':
00372             return head + '/.' + tail + '.' + str (os.getpid ())
00373         else:
00374             return head + '.' + tail + '.' + str (os.getpid ())
00375 
00376 
00377     def __put__ (self, filesize, body_hook, remotefile):
00378         """ __put__ docstring """
00379         headers = {'Content-Length' : str (filesize)}
00380         remotefile_tmp = self.__tmp_filename (remotefile)
00381         response = self.request ('PUT', remotefile_tmp, None,
00382                                  headers, True, body_hook)
00383         if not response.status in (201, 204): # created, no content
00384             self.__err (response, ('PUT', remotefile, None, headers, 0))
00385         if filesize != self.get_file_size (remotefile_tmp):
00386             try:
00387                 self.delete (remotefile_tmp)
00388             except:
00389                 pass
00390             raise DAVError (0, 'tmp upload error', remotefile_tmp)
00391         # move the file to its final location
00392         try:
00393             self.rename (remotefile_tmp, remotefile)
00394         except DAVError, exc:
00395             if exc.status == 403: # try to clean up the tmp file
00396                 try:
00397                     self.delete (remotefile_tmp)
00398                 except:
00399                     pass
00400             raise
00401         if filesize != self.get_file_size (remotefile):
00402             raise DAVError (0, 'file upload error', str (remotefile_tmp))
00403 
00404 
00405     def put_string (self, strng, remotefile):
00406         """ put_string docstring """
00407         self.log ('put_string %d -> %s' % (len (strng), remotefile))
00408         filesize = len (strng)
00409         def body_hook ():
00410             """ body_hook docstring """
00411             self.connection.send (strng)
00412         self.__put__ (filesize, body_hook, remotefile)
00413 
00414 
00415     def put_file (self, localfile, remotefile):
00416         """
00417         Send a local file to a remote webdav store.  First, upload to
00418         a temporary filename.  Next make sure the file is the size we
00419         expected.  Next, move the file to its final location.  Next,
00420         check the file size at the final location.
00421         """
00422         self.log ('put_file %s -> %s' % (localfile, remotefile))
00423         filesize = os.path.getsize (localfile)
00424         def body_hook ():
00425             """ body_hook docstring """
00426             handle = open (localfile)
00427             while True:
00428                 data = handle.read (1300)
00429                 if len (data) == 0:
00430                     break
00431                 self.connection.send (data)
00432             handle.close ()
00433         self.__put__ (filesize, body_hook, remotefile)
00434 
00435 
00436     def create_empty_file (self, remotefile):
00437         """ create an empty file """
00438         self.log ('touch_file %s' % (remotefile))
00439         headers = {'Content-Length' : '0'}
00440         response = self.request ('PUT', remotefile, None, headers)
00441         if not response.status in (201, 204): # created, no content
00442             self.__err (response, ('PUT', remotefile, None, headers, 0))
00443         if self.get_file_size (remotefile) != 0:
00444             raise DAVError (0, 'file upload error', str (remotefile))
00445 
00446 
00447     def __get_file_setup (self, remotefile, check_size=True):
00448         """ __get_file_setup docstring """
00449         if check_size:
00450             remotesize = self.get_file_size (remotefile)
00451         response = self.request ('GET', remotefile, None, {}, False)
00452         if response.status != 200:
00453             self.__err (response, ('GET', remotefile, None, {}, 0))
00454         try:
00455             content_length = int (response.getheader ("Content-Length"))
00456         except TypeError:
00457             content_length = None
00458         if check_size:
00459             if content_length != remotesize:
00460                 raise DAVError (0, 'file DL size error', remotefile)
00461         return (response, content_length)
00462 
00463 
00464     def __get_file_read (self, writehandle, response, content_length):
00465         """ __get_file_read docstring """
00466         if content_length != None:
00467             so_far_length = 0
00468             while so_far_length < content_length:
00469                 data = response.read (content_length - so_far_length)
00470                 if len (data) == 0:
00471                     raise DAVError (0, 'short file download')
00472                 so_far_length += len (data)
00473                 writehandle.write (data)
00474             while len (response.read ()) > 0:
00475                 pass
00476         else:
00477             while True:
00478                 data = response.read ()
00479                 if (len (data) < 1):
00480                     break
00481                 writehandle.write (data)
00482 
00483 
00484     def get_file (self, remotefile, localfile, check_size=True):
00485         """
00486         Get a remote file from a webdav server.  Download to a local
00487         tmp file, then move into place.  Sanity check file sizes as
00488         we go.
00489         """
00490         self.log ('get_file %s -> %s' % (remotefile, localfile))
00491         (response, content_length) = \
00492                    self.__get_file_setup (remotefile, check_size)
00493         localfile_tmp = self.__tmp_filename (localfile)
00494         handle = open (localfile_tmp, 'w')
00495         self.__get_file_read (handle, response, content_length)
00496         handle.close ()
00497         if check_size:
00498             if content_length != os.path.getsize (localfile_tmp):
00499                 raise DAVError (0, 'file DL size error',
00500                                 remotefile+','+localfile)
00501         os.rename (localfile_tmp, localfile)
00502 
00503 
00504     def get_file_as_string (self, remotefile, check_size=True):
00505         """
00506         download a file from a webdav server and return it as a string.
00507         """
00508         self.log ('get_file_as_string %s' % remotefile)
00509         (response, content_length) = \
00510                    self.__get_file_setup (remotefile, check_size)
00511         # (tmp_handle, tmp_filename) = tempfile.mkstemp ()
00512         tmp_handle = os.tmpfile ()
00513         self.__get_file_read (tmp_handle, response, content_length)
00514         tmp_handle.seek (0)
00515         ret = tmp_handle.read ()
00516         tmp_handle.close ()
00517         # os.unlink (tmp_filename)
00518         return ret
00519 
00520 
00521     def get_post_as_string (self, remotefile, body):
00522         """
00523         Do an http POST, send body, get response and return it.
00524         """
00525         self.log ('get_post_as_string %s' % remotefile)
00526         # headers = {'Content-Type':'application/x-www-form-urlencoded'}
00527         headers = {'Content-Type':'text/xml; charset="utf-8"'}
00528         # b64body = urlsafe_b64encode (asset_url)
00529         response = self.request ('POST', remotefile, body, headers, False)
00530         if response.status != 200:
00531             self.__err (response, ('POST', remotefile, body, headers, 0))
00532         try:
00533             content_length = int (response.getheader ('Content-Length'))
00534         except TypeError:
00535             content_length = None
00536         tmp_handle = os.tmpfile ()
00537         self.__get_file_read (tmp_handle, response, content_length)
00538         tmp_handle.seek (0)
00539         ret = tmp_handle.read ()
00540         tmp_handle.close ()
00541         return ret
00542 
00543 
00544     def __destination_command (self, verb, remotesrc, dstdav, remotedst):
00545         """
00546         self and dstdav should point to the same http server.
00547         """
00548         if len (remotedst) > 0 and remotedst[ 0 ] == '/':
00549             remotedst = remotedst[1:]
00550         headers = {'Destination': 'http://%s:%d%s%s' % (dstdav.host,
00551                                                         dstdav.port,
00552                                                         dstdav.top_path,
00553                                                         remotedst)}
00554         response = self.request (verb, remotesrc, None, headers)
00555         if response.status == 201:
00556             return # created
00557         if response.status == 204:
00558             return # no content
00559         self.__err (response, (verb, remotesrc, None, headers, 0))
00560 
00561 
00562     def rename (self, remotesrc, remotedst):
00563         """ rename a file on a webdav server """
00564         self.log ('rename %s -> %s' % (remotesrc, remotedst))
00565         self.__destination_command ('MOVE', remotesrc, self, remotedst)
00566     def xrename (self, remotesrc, dstdav, remotedst):
00567         """ rename a file on a webdav server """
00568         self.log ('xrename %s -> %s' % (remotesrc, remotedst))
00569         self.__destination_command ('MOVE', remotesrc, dstdav, remotedst)
00570 
00571 
00572     def copy (self, remotesrc, remotedst):
00573         """ copy a file on a webdav server """
00574         self.log ('copy %s -> %s' % (remotesrc, remotedst))
00575         self.__destination_command ('COPY', remotesrc, self, remotedst)
00576     def xcopy (self, remotesrc, dstdav, remotedst):
00577         """ copy a file on a webdav server """
00578         self.log ('xcopy %s -> %s' % (remotesrc, remotedst))
00579         self.__destination_command ('COPY', remotesrc, dstdav, remotedst)
00580 
00581 
00582 def put_string (data, url):
00583     """
00584     upload string s to a url
00585     """
00586     url_parsed = urlparse.urlsplit (url)
00587     dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
00588     dav.put_string (data, url_parsed[ 2 ])
00589 
00590 
00591 def get_string (url, check_size=True):
00592     """
00593     return the contents of a url as a string
00594     """
00595     url_parsed = urlparse.urlsplit (url)
00596     dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
00597     return dav.get_file_as_string (url_parsed[ 2 ], check_size)

Generated on Fri May 16 08:31:53 2008 for SecondLife by  doxygen 1.5.5