00001 """
00002 @file webdav.py
00003 @brief Classes to make manipulation of a webdav store easier.
00004
00005 $LicenseInfo:firstyear=2007&license=mit$
00006
00007 Copyright (c) 2007-2008, Linden Research, Inc.
00008
00009 Permission is hereby granted, free of charge, to any person obtaining a copy
00010 of this software and associated documentation files (the "Software"), to deal
00011 in the Software without restriction, including without limitation the rights
00012 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00013 copies of the Software, and to permit persons to whom the Software is
00014 furnished to do so, subject to the following conditions:
00015
00016 The above copyright notice and this permission notice shall be included in
00017 all copies or substantial portions of the Software.
00018
00019 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00020 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00021 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00022 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00023 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00024 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00025 THE SOFTWARE.
00026 $/LicenseInfo$
00027 """
00028
00029 import sys, os, httplib, urlparse
00030 import socket, time
00031 import xml.dom.minidom
00032 import syslog
00033
00034
00035 __revision__ = '0'
00036
00037 dav_debug = False
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 class DAVError (Exception):
00048 """ Base class for exceptions in this module. """
00049 def __init__ (self, status=0, message='', body='', details=''):
00050 self.status = status
00051 self.message = message
00052 self.body = body
00053 self.details = details
00054 Exception.__init__ (self, '%d:%s:%s%s' % (self.status, self.message,
00055 self.body, self.details))
00056
00057 def print_to_stderr (self):
00058 """ print_to_stderr docstring """
00059 print >> sys.stderr, str (self.status) + ' ' + self.message
00060 print >> sys.stderr, str (self.details)
00061
00062
00063 class Timeout (Exception):
00064 """ Timeout docstring """
00065 def __init__ (self, arg=''):
00066 Exception.__init__ (self, arg)
00067
00068
00069 def alarm_handler (signum, frame):
00070 """ alarm_handler docstring """
00071 raise Timeout ('caught alarm')
00072
00073
00074 class WebDAV:
00075 """ WebDAV docstring """
00076 def __init__ (self, url, proxy=None, retries_before_fail=6):
00077 self.init_url = url
00078 self.init_proxy = proxy
00079 self.retries_before_fail = retries_before_fail
00080 url_parsed = urlparse.urlsplit (url)
00081
00082 self.top_path = url_parsed[ 2 ]
00083
00084 if self.top_path == None or self.top_path == '':
00085 self.top_path = '/'
00086 elif len (self.top_path) > 1 and self.top_path[-1:] != '/':
00087 self.top_path += '/'
00088
00089 if dav_debug:
00090 syslog.syslog ('new WebDAV %s : %s' % (str (url), str (proxy)))
00091
00092 if proxy:
00093 proxy_parsed = urlparse.urlsplit (proxy)
00094 self.host_header = url_parsed[ 1 ]
00095 host_and_port = proxy_parsed[ 1 ].split (':')
00096 self.host = host_and_port[ 0 ]
00097 if len (host_and_port) > 1:
00098 self.port = int(host_and_port[ 1 ])
00099 else:
00100 self.port = 80
00101 else:
00102 host_and_port = url_parsed[ 1 ].split (':')
00103 self.host_header = None
00104 self.host = host_and_port[ 0 ]
00105 if len (host_and_port) > 1:
00106 self.port = int(host_and_port[ 1 ])
00107 else:
00108 self.port = 80
00109
00110 self.connection = False
00111 self.connect ()
00112
00113
00114 def log (self, msg, depth=0):
00115 """ log docstring """
00116 if dav_debug and depth == 0:
00117 host = str (self.init_url)
00118 if host == 'http://int.tuco.lindenlab.com:80/asset/':
00119 host = 'tuco'
00120 if host == 'http://harriet.lindenlab.com/asset-keep/':
00121 host = 'harriet/asset-keep'
00122 if host == 'http://harriet.lindenlab.com/asset-flag/':
00123 host = 'harriet/asset-flag'
00124 if host == 'http://harriet.lindenlab.com/asset/':
00125 host = 'harriet/asset'
00126 if host == 'http://ozzy.lindenlab.com/asset/':
00127 host = 'ozzy/asset'
00128 if host == 'http://station11.lindenlab.com:12041/:':
00129 host = 'station11:12041'
00130 proxy = str (self.init_proxy)
00131 if proxy == 'None':
00132 proxy = ''
00133 if proxy == 'http://int.tuco.lindenlab.com:3128/':
00134 proxy = 'tuco'
00135 syslog.syslog ('WebDAV (%s:%s) %s' % (host, proxy, str (msg)))
00136
00137
00138 def connect (self):
00139 """ connect docstring """
00140 self.log ('connect')
00141 self.connection = httplib.HTTPConnection (self.host, self.port)
00142
00143 def __err (self, response, details):
00144 """ __err docstring """
00145 raise DAVError (response.status, response.reason, response.read (),
00146 str (self.init_url) + ':' + \
00147 str (self.init_proxy) + ':' + str (details))
00148
00149 def request (self, method, path, body=None, headers=None,
00150 read_all=True, body_hook = None, recurse=0, allow_cache=True):
00151 """ request docstring """
00152
00153 if headers == None:
00154 headers = {}
00155 if not allow_cache:
00156 headers['Pragma'] = 'no-cache'
00157 headers['cache-control'] = 'no-cache'
00158 try:
00159 if method.lower () != 'purge':
00160 if path.startswith ('/'):
00161 path = path[1:]
00162 if self.host_header:
00163 headers[ 'host' ] = self.host_header
00164 fullpath = 'http://%s%s%s' % (self.host_header,
00165 self.top_path, path)
00166 else:
00167 fullpath = self.top_path + path
00168 else:
00169 fullpath = path
00170
00171 self.connection.request (method, fullpath, body, headers)
00172 if body_hook:
00173 body_hook ()
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187 response = self.connection.getresponse ()
00188
00189 if read_all:
00190 while len (response.read (1024)) > 0:
00191 pass
00192 if (response.status == 500 or \
00193 response.status == 503 or \
00194 response.status == 403) and \
00195 recurse < self.retries_before_fail:
00196 return self.retry_request (method, path, body, headers,
00197 read_all, body_hook, recurse)
00198 return response
00199 except (httplib.ResponseNotReady,
00200 httplib.BadStatusLine,
00201 socket.error):
00202
00203
00204 if recurse < self.retries_before_fail:
00205 return self.retry_request (method, path, body, headers,
00206 read_all, body_hook, recurse)
00207 raise DAVError (0, 'reconnect failed', self.host,
00208 (method, path, body, headers, recurse))
00209
00210
00211 def retry_request (self, method, path, body, headers,
00212 read_all, body_hook, recurse):
00213 """ retry_request docstring """
00214 time.sleep (10.0 * recurse)
00215 self.connect ()
00216 return self.request (method, path, body, headers,
00217 read_all, body_hook, recurse+1)
00218
00219
00220
00221 def propfind (self, path, body=None, depth=1):
00222 """ propfind docstring """
00223
00224 headers = {'Content-Type':'text/xml; charset="utf-8"',
00225 'Depth':str(depth)}
00226 response = self.request ('PROPFIND', path, body, headers, False)
00227 if response.status == 207:
00228 return response
00229 self.__err (response, ('PROPFIND', path, body, headers, 0))
00230
00231
00232 def purge (self, path):
00233 """ issue a squid purge command """
00234 headers = {'Accept':'*/*'}
00235 response = self.request ('PURGE', path, None, headers)
00236 if response.status == 200 or response.status == 404:
00237
00238 return response
00239 self.__err (response, ('PURGE', path, None, headers))
00240
00241
00242 def get_file_size (self, path):
00243 """
00244 Use propfind to ask a webdav server what the size of
00245 a file is. If used on a directory (collection) return 0
00246 """
00247 self.log ('get_file_size %s' % path)
00248
00249
00250
00251 nsurl = 'http://apache.org/dav/props/'
00252 doc = xml.dom.minidom.Document ()
00253 propfind_element = doc.createElementNS (nsurl, "D:propfind")
00254 propfind_element.setAttributeNS (nsurl, 'xmlns:D', 'DAV:')
00255 doc.appendChild (propfind_element)
00256 prop_element = doc.createElementNS (nsurl, "D:prop")
00257 propfind_element.appendChild (prop_element)
00258 con_len_element = doc.createElementNS (nsurl, "D:getcontentlength")
00259 prop_element.appendChild (con_len_element)
00260
00261 response = self.propfind (path, doc.toxml ())
00262 doc.unlink ()
00263
00264 resp_doc = xml.dom.minidom.parseString (response.read ())
00265 cln = resp_doc.getElementsByTagNameNS ('DAV:','getcontentlength')[ 0 ]
00266 try:
00267 content_length = int (cln.childNodes[ 0 ].nodeValue)
00268 except IndexError:
00269 return 0
00270 resp_doc.unlink ()
00271 return content_length
00272
00273
00274 def file_exists (self, path):
00275 """
00276 do an http head on the given file. return True if it succeeds
00277 """
00278 self.log ('file_exists %s' % path)
00279 expect_gzip = path.endswith ('.gz')
00280 response = self.request ('HEAD', path)
00281 got_gzip = response.getheader ('Content-Encoding', '').strip ()
00282 if got_gzip.lower () == 'x-gzip' and expect_gzip == False:
00283
00284
00285 return False
00286 return response.status == 200
00287
00288
00289 def mkdir (self, path):
00290 """ mkdir docstring """
00291 self.log ('mkdir %s' % path)
00292 headers = {}
00293 response = self.request ('MKCOL', path, None, headers)
00294 if response.status == 201:
00295 return
00296 if response.status == 405:
00297 return
00298 self.__err (response, ('MKCOL', path, None, headers, 0))
00299
00300
00301 def delete (self, path):
00302 """ delete docstring """
00303 self.log ('delete %s' % path)
00304 headers = {'Depth':'infinity'}
00305 response = self.request ('DELETE', path, None, headers)
00306 if response.status == 204:
00307 return
00308 if response.status == 404:
00309 return
00310 self.__err (response, ('DELETE', path, None, headers, 0))
00311
00312
00313 def list_directory (self, path, dir_filter=None, allow_cache=True,
00314 minimum_cache_time=False):
00315 """
00316 Request an http directory listing and parse the filenames out of lines
00317 like: '<LI><A HREF="X"> X</A>'. If a filter function is provided,
00318 only return filenames that the filter returns True for.
00319
00320 This is sort of grody, but it seems faster than other ways of getting
00321 this information from an isilon.
00322 """
00323 self.log ('list_directory %s' % path)
00324
00325 def try_match (lline, before, after):
00326 """ try_match docstring """
00327 try:
00328 blen = len (before)
00329 asset_start_index = lline.index (before)
00330 asset_end_index = lline.index (after, asset_start_index + blen)
00331 asset = line[ asset_start_index + blen : asset_end_index ]
00332
00333 if not dir_filter or dir_filter (asset):
00334 return [ asset ]
00335 return []
00336 except ValueError:
00337 return []
00338
00339 if len (path) > 0 and path[-1:] != '/':
00340 path += '/'
00341
00342 response = self.request ('GET', path, None, {}, False,
00343 allow_cache=allow_cache)
00344
00345 if allow_cache and minimum_cache_time:
00346 print response.getheader ('Date')
00347
00348
00349
00350
00351
00352
00353 if response.status != 200:
00354 self.__err (response, ('GET', path, None, {}, 0))
00355 assets = []
00356 for line in response.read ().split ('\n'):
00357 lline = line.lower ()
00358 if lline.find ("parent directory") == -1:
00359
00360 assets += try_match (lline, '<li><a href="', '"> ')
00361
00362 assets += try_match (lline, 'alt="[dir]"> <a href="', '/">')
00363
00364 assets += try_match (lline, 'alt="[ ]"> <a href="', '">')
00365 return assets
00366
00367
00368 def __tmp_filename (self, path_and_file):
00369 """ __tmp_filename docstring """
00370 head, tail = os.path.split (path_and_file)
00371 if head != '':
00372 return head + '/.' + tail + '.' + str (os.getpid ())
00373 else:
00374 return head + '.' + tail + '.' + str (os.getpid ())
00375
00376
00377 def __put__ (self, filesize, body_hook, remotefile):
00378 """ __put__ docstring """
00379 headers = {'Content-Length' : str (filesize)}
00380 remotefile_tmp = self.__tmp_filename (remotefile)
00381 response = self.request ('PUT', remotefile_tmp, None,
00382 headers, True, body_hook)
00383 if not response.status in (201, 204):
00384 self.__err (response, ('PUT', remotefile, None, headers, 0))
00385 if filesize != self.get_file_size (remotefile_tmp):
00386 try:
00387 self.delete (remotefile_tmp)
00388 except:
00389 pass
00390 raise DAVError (0, 'tmp upload error', remotefile_tmp)
00391
00392 try:
00393 self.rename (remotefile_tmp, remotefile)
00394 except DAVError, exc:
00395 if exc.status == 403:
00396 try:
00397 self.delete (remotefile_tmp)
00398 except:
00399 pass
00400 raise
00401 if filesize != self.get_file_size (remotefile):
00402 raise DAVError (0, 'file upload error', str (remotefile_tmp))
00403
00404
00405 def put_string (self, strng, remotefile):
00406 """ put_string docstring """
00407 self.log ('put_string %d -> %s' % (len (strng), remotefile))
00408 filesize = len (strng)
00409 def body_hook ():
00410 """ body_hook docstring """
00411 self.connection.send (strng)
00412 self.__put__ (filesize, body_hook, remotefile)
00413
00414
00415 def put_file (self, localfile, remotefile):
00416 """
00417 Send a local file to a remote webdav store. First, upload to
00418 a temporary filename. Next make sure the file is the size we
00419 expected. Next, move the file to its final location. Next,
00420 check the file size at the final location.
00421 """
00422 self.log ('put_file %s -> %s' % (localfile, remotefile))
00423 filesize = os.path.getsize (localfile)
00424 def body_hook ():
00425 """ body_hook docstring """
00426 handle = open (localfile)
00427 while True:
00428 data = handle.read (1300)
00429 if len (data) == 0:
00430 break
00431 self.connection.send (data)
00432 handle.close ()
00433 self.__put__ (filesize, body_hook, remotefile)
00434
00435
00436 def create_empty_file (self, remotefile):
00437 """ create an empty file """
00438 self.log ('touch_file %s' % (remotefile))
00439 headers = {'Content-Length' : '0'}
00440 response = self.request ('PUT', remotefile, None, headers)
00441 if not response.status in (201, 204):
00442 self.__err (response, ('PUT', remotefile, None, headers, 0))
00443 if self.get_file_size (remotefile) != 0:
00444 raise DAVError (0, 'file upload error', str (remotefile))
00445
00446
00447 def __get_file_setup (self, remotefile, check_size=True):
00448 """ __get_file_setup docstring """
00449 if check_size:
00450 remotesize = self.get_file_size (remotefile)
00451 response = self.request ('GET', remotefile, None, {}, False)
00452 if response.status != 200:
00453 self.__err (response, ('GET', remotefile, None, {}, 0))
00454 try:
00455 content_length = int (response.getheader ("Content-Length"))
00456 except TypeError:
00457 content_length = None
00458 if check_size:
00459 if content_length != remotesize:
00460 raise DAVError (0, 'file DL size error', remotefile)
00461 return (response, content_length)
00462
00463
00464 def __get_file_read (self, writehandle, response, content_length):
00465 """ __get_file_read docstring """
00466 if content_length != None:
00467 so_far_length = 0
00468 while so_far_length < content_length:
00469 data = response.read (content_length - so_far_length)
00470 if len (data) == 0:
00471 raise DAVError (0, 'short file download')
00472 so_far_length += len (data)
00473 writehandle.write (data)
00474 while len (response.read ()) > 0:
00475 pass
00476 else:
00477 while True:
00478 data = response.read ()
00479 if (len (data) < 1):
00480 break
00481 writehandle.write (data)
00482
00483
00484 def get_file (self, remotefile, localfile, check_size=True):
00485 """
00486 Get a remote file from a webdav server. Download to a local
00487 tmp file, then move into place. Sanity check file sizes as
00488 we go.
00489 """
00490 self.log ('get_file %s -> %s' % (remotefile, localfile))
00491 (response, content_length) = \
00492 self.__get_file_setup (remotefile, check_size)
00493 localfile_tmp = self.__tmp_filename (localfile)
00494 handle = open (localfile_tmp, 'w')
00495 self.__get_file_read (handle, response, content_length)
00496 handle.close ()
00497 if check_size:
00498 if content_length != os.path.getsize (localfile_tmp):
00499 raise DAVError (0, 'file DL size error',
00500 remotefile+','+localfile)
00501 os.rename (localfile_tmp, localfile)
00502
00503
00504 def get_file_as_string (self, remotefile, check_size=True):
00505 """
00506 download a file from a webdav server and return it as a string.
00507 """
00508 self.log ('get_file_as_string %s' % remotefile)
00509 (response, content_length) = \
00510 self.__get_file_setup (remotefile, check_size)
00511
00512 tmp_handle = os.tmpfile ()
00513 self.__get_file_read (tmp_handle, response, content_length)
00514 tmp_handle.seek (0)
00515 ret = tmp_handle.read ()
00516 tmp_handle.close ()
00517
00518 return ret
00519
00520
00521 def get_post_as_string (self, remotefile, body):
00522 """
00523 Do an http POST, send body, get response and return it.
00524 """
00525 self.log ('get_post_as_string %s' % remotefile)
00526
00527 headers = {'Content-Type':'text/xml; charset="utf-8"'}
00528
00529 response = self.request ('POST', remotefile, body, headers, False)
00530 if response.status != 200:
00531 self.__err (response, ('POST', remotefile, body, headers, 0))
00532 try:
00533 content_length = int (response.getheader ('Content-Length'))
00534 except TypeError:
00535 content_length = None
00536 tmp_handle = os.tmpfile ()
00537 self.__get_file_read (tmp_handle, response, content_length)
00538 tmp_handle.seek (0)
00539 ret = tmp_handle.read ()
00540 tmp_handle.close ()
00541 return ret
00542
00543
00544 def __destination_command (self, verb, remotesrc, dstdav, remotedst):
00545 """
00546 self and dstdav should point to the same http server.
00547 """
00548 if len (remotedst) > 0 and remotedst[ 0 ] == '/':
00549 remotedst = remotedst[1:]
00550 headers = {'Destination': 'http://%s:%d%s%s' % (dstdav.host,
00551 dstdav.port,
00552 dstdav.top_path,
00553 remotedst)}
00554 response = self.request (verb, remotesrc, None, headers)
00555 if response.status == 201:
00556 return
00557 if response.status == 204:
00558 return
00559 self.__err (response, (verb, remotesrc, None, headers, 0))
00560
00561
00562 def rename (self, remotesrc, remotedst):
00563 """ rename a file on a webdav server """
00564 self.log ('rename %s -> %s' % (remotesrc, remotedst))
00565 self.__destination_command ('MOVE', remotesrc, self, remotedst)
00566 def xrename (self, remotesrc, dstdav, remotedst):
00567 """ rename a file on a webdav server """
00568 self.log ('xrename %s -> %s' % (remotesrc, remotedst))
00569 self.__destination_command ('MOVE', remotesrc, dstdav, remotedst)
00570
00571
00572 def copy (self, remotesrc, remotedst):
00573 """ copy a file on a webdav server """
00574 self.log ('copy %s -> %s' % (remotesrc, remotedst))
00575 self.__destination_command ('COPY', remotesrc, self, remotedst)
00576 def xcopy (self, remotesrc, dstdav, remotedst):
00577 """ copy a file on a webdav server """
00578 self.log ('xcopy %s -> %s' % (remotesrc, remotedst))
00579 self.__destination_command ('COPY', remotesrc, dstdav, remotedst)
00580
00581
00582 def put_string (data, url):
00583 """
00584 upload string s to a url
00585 """
00586 url_parsed = urlparse.urlsplit (url)
00587 dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
00588 dav.put_string (data, url_parsed[ 2 ])
00589
00590
00591 def get_string (url, check_size=True):
00592 """
00593 return the contents of a url as a string
00594 """
00595 url_parsed = urlparse.urlsplit (url)
00596 dav = WebDAV ('%s://%s/' % (url_parsed[ 0 ], url_parsed[ 1 ]))
00597 return dav.get_file_as_string (url_parsed[ 2 ], check_size)