# Part of the A-A-P recipe executive: Access files which may be remote # Copyright (C) 2002 Stichting NLnet Labs # Permission to copy and use this file is specified in the file COPYING. # If this file is missing you can find it here: http://www.a-a-p.org/COPYING # # Access files by their URL. # If they are remote, may download or upload the file. # Uses the Cache to avoid up/downloading too often. # import os.path import shutil import time from urlparse import urlparse from urllib import urlretrieve, urlcleanup, urlopen from Util import * from Message import * def is_url(name): """Return non-zero when "name" is a URL, zero when it's a local file.""" scheme, mach, path, parm, query, frag = urlparse(name, '', 0) return scheme != '' def url_split3(name): """Split a URL into scheme, machine and path.""" scheme, mach, path, parm, query, frag = urlparse(name, '', 0) if scheme != '' and mach == '' and path[:2] == '//': # urlparse doesn't handle scp://machine/path correctly mach = path[2:] i = string.find(mach, '/') if i > 0: path = mach[i + 1:] mach = mach[:i] return scheme, mach, path + parm + query + frag def url_time(globals, name): """Obtain the timestamp in seconds (in GMT if possible) for the URL "name". Returns zero (very old) if the timestamp can't be obtained.""" if is_url(name): from Cache import cache_lookup c = cache_lookup(globals, name) if c: # use timestamp for cached file. t = c.timestamp() else: # obtain timestamp for remote files. t = remote_time(name) else: try: t = os.path.getmtime(name) except (IOError, OSError): t = 0 return t def remote_time(name): """Get the timestamp of a remote file.""" try: msg_info(_('getting timestamp for "%s"') % name) up = urlopen(name) t = get_header_date(up.info()) up.close() except: t = 0 return t def get_header_date(headers): """Get the date from a MIME header. Returns zero when not available.""" from rfc822 import parsedate if headers.has_key("Last-Modified"): return time.mktime(parsedate(headers["Last-Modified"])) if headers.has_key("Date"): return time.mktime(parsedate(headers["Date"])) return 0 def url_download(url, fname): """Attempt downloading file "url" to file "fname". Overwrite "fname" if it already exists. When "fname" is empty, use a temporary file. The caller has to use "url_cleanup()" when done with it. Returns a tuple of the filename and the timestamp of the remote file when possible. Throws an IOError if downloading failed.""" msg_info(_('Attempting download of "%s"' % url)) rtime = 0 fscheme, fmach, fpath = url_split3(url) if fscheme == 'scp': if fname == '': resfile = tempfname() else: resfile = fname logged_system('scp -C %s:%s %s' % (fmach, fpath, resfile)) else: if fname == '': # read to temporary file resfile, h = urlretrieve(url) else: resfile, h = urlretrieve(url, fname) if resfile != fname: # Using a cached file, need to make a copy. shutil.copy2(resfile, fname) resfile = fname urlcleanup() if h: rtime = get_header_date(h) if fname == '': msg_info(_('Downloaded "%s"' % url)) else: msg_info(_('Downloaded "%s" to "%s"' % (url, fname))) return resfile, rtime def url_cleanup(scheme): """Cleanup after using url_download with scheme "scheme".""" if scheme != 'scp': urlcleanup() # remove any cached file from urlretrieve() def download_file(globals, url_dl, node, use_cache): """Download a file according to "url_dl" and copy it over "node.name". Use the cache when "use_cache" is non-zero, otherwise obtain a fresh copy. Return non-zero for success.""" from Cache import local_name if not use_cache: cu = "0 sec" elif url_dl.has_key("cache_update"): cu = url_dl["cache_update"] else: cu = None # TODO: handle attributes (e.g., login and password) fname = local_name(globals, url_dl["name"], cu) if fname and os.path.exists(fname): # copy the downloaded file over the original one. try: import shutil shutil.copyfile(fname, node.absname) except IOError, e: raise UserError, (_('Cannot copy "%s" to "%s"') % (fname, node.name) + str(e)) msg_info(_('Copied file from cache: "%s"') % node.short_name()) return 1 return 0 # vim: set sw=4 sts=4 tw=79 fo+=l: .