""" Sub-module ``Fetch`` ==================== """ ############################################################################### # Test program for communication with the Auger Offline source code mirrors. # # # # Depends on urllib2 for communication, minidom for XML source list parsing, # # and progressbar, an unofficial library for pretty-printing of the download # # status. # # # # The program currently: # # - Reads download mirror configure settings from an XML card. # # - Cycles through the list of servers once until making a connection. # # - Downloads a file from the server, then checks it. # # # ############################################################################### import os import os.path import socket import stat import sys import copy import re # hashlib deprecates sha in python 2.6. Suppress the warning. import warnings warnings.filterwarnings("ignore", "the sha module is deprecated", DeprecationWarning) import sha try: import urllib2 haveUrllib2 = True except ImportError: haveUrllib2 = False import getpass import ConfigParser # needed for exception handling from Progressbar import Percentage, Bar, ETA, FileTransferSpeed, ProgressBar from ApeTools import InstallError from ApeTools import Config (major, minor, micro, release, serial) = sys.version_info # Global timeout settings (needed for python < 2.6) timeOutSec = 10 socket.setdefaulttimeout(timeOutSec) haveWget = False for p in os.environ["PATH"].split(":"): if os.path.exists(os.path.join(p, "wget")): haveWget = True #haveWget = False # for debugging: force use of urllib2 class DownloadError(InstallError): """Exception class for problems with the download. """ def __init__(self, value): self.value = value InstallError.__init__(self, args=[value], stage="download") class DataMirror: """Class to store data mirror properties and fetch requested source files. :param owner: The :class:`DownloadManager` instance which owns this object. :param tag: A tag to identify the mirror. This is used to select the preferred mirror(s). :param location: Specifies the country and institution where the mirror is hosted. :param url: The base url for this mirror. Files are located relative to this url. :param user: The user-name to specify for downloads from this site. A password is requeste when needed. """ def __init__(self, owner, tag, location, url, user): self.owner = owner self.tag = tag self.location = location self.url = url self.user = user self.auth = None def fetchFile(self, fileName): """Retrieve the file *filename* from this mirror. The password is for the :attr:`user` is requested from our :attr:`owner`. :raise: :exc:`DownloadError` in case of problems. """ if not (haveUrllib2 or haveWget): raise DownloadError("No download method available: " "install wget or upgrade python") url = self.url + "/" + fileName self.passwd = self.owner.getPassword(self.user) if haveWget: self.fetchFileWget(fileName, url) elif haveUrllib2: self.fetchFileNative(fileName, url) else: raise RuntimeError('FetchFile: should never get here.') def fetchFileNative(self, fileName, url): """Fetch one file from remote mirror. This routine provides a pure python based implementation, based on :mod:`urllib2`. .. note:: Currently, in python 2.6 and earlier, this method cannot be used to retrieve ``https`` urls through a firewall. The preferred method is :meth:`fetchFileWget`. :raise: :exc:`DownloadError` in case of problems. """ if self.auth is None: self.pmgr = urllib2.HTTPPasswordMgrWithDefaultRealm() self.pmgr.add_password(None, self.url, self.user, self.passwd) self.auth = urllib2.HTTPBasicAuthHandler(self.pmgr) opener = urllib2.build_opener(self.auth) urllib2.install_opener(opener) try: remoteFile = urllib2.urlopen(url) except urllib2.HTTPError: raise DownloadError("Cannot open %s." % url) fileSize = int(remoteFile.info()["Content-Length"]) localFile = open(fileName, "wb") # Track the transfer, in 200k blocks, with a progress bar. blockCount = 0 byteCount = 0 buffSize = 200 * 1024 print "\nDownloading %s from '%s'" % (fileName, self.location) pwidgets = ["Progress: ", Percentage(), " ", Bar(), " ", ETA(), " ", FileTransferSpeed()] pbar = ProgressBar(widgets=pwidgets, maxval=fileSize).start() while True: buffer = remoteFile.read(buffSize) if buffer == "": break localFile.write(buffer) blockCount += 1 byteCount = blockCount * buffSize if (byteCount > fileSize): byteCount = fileSize pbar.update(byteCount) pbar.finish() remoteFile.close() localFile.close() # Check the download for a corrupted or interrupted transfer. localSize = int(os.stat(fileName)[stat.ST_SIZE]) if localSize != fileSize: raise DownloadError("Size of " + fileName + " != size on host.") def fetchFileWget(self, fileName, url): """Fetch a file from a remote mirror, using ``wget`` to do the actual transfer. """ wgetrcName = os.path.abspath('ape.tmp.wgetrc') wgetrc = open(wgetrcName, "w") os.chmod(wgetrcName, stat.S_IRUSR | stat.S_IWUSR) try: wgetrc.write("user=%s\npassword=%s\n" % (self.user, self.passwd)) wgetrc.close() env = copy.copy(os.environ) env["WGETRC"] = wgetrcName failed = os.spawnvpe(os.P_WAIT, "wget", ["wget", "--no-check-certificate", url], env) finally: wgetrc.close() os.remove(wgetrcName) if failed: raise DownloadError("Failed to download %s with wget." % url) class DownloadManager: """Manager for downloads.""" def __init__(self): self.fillMirrors() self.fillSha1() self.mirrorIndex = 0 self.passwords = {} def addMirror(self, tag): """Add the mirror identified by *tag*. The information is retrieved from the configuration management, section ``[mirror `tag`]``. Defautls for all mirrors can be provided in the section ``[mirror]``. .. code-block:: ini [mirror] user = globalUser [mirror exa] location = Exampolonia University url = http://www.mirror.example.edu/ape-files :raise: :exc:`ApeTools.InstallError` if it cannot locate the information for mirror *tag*. """ section = "mirror " + tag fallback = ["mirror"] try: self.mirrorList.append(DataMirror(self, tag, Config.get(section, "location", fallback), Config.get(section, "url", fallback), Config.get(section, "user", fallback))) except ConfigParser.NoSectionError: raise InstallError(stage="initialization", args=["Error loading data for mirror tagged " "'%s'" % tag]) def fillMirrors(self): """Extract the information for all mirrors. The list of mirror sites starts with the sites listed in the ``ape.mirrors`` configuration variable. This is followed by all sites not yet listed, identified by looking for sections named ``[mirror `tag`]``. """ self.mirrorList = [] mirrorTags = Config.getlist("ape", "mirrors") for tag in mirrorTags: self.addMirror(tag) for tag in [s[7:] for s in Config.sections() if \ s.startswith("mirror ")]: if tag not in mirrorTags: self.addMirror(tag) def fillSha1(self): """Read the sha1 checksums from a file. The file is specified by the configuration variable ``sha1File`` in section ``[ape internal]``. The file is in the format generated by ``openssl sha1``. .. note:: The SHA1 file has to be re-generated when the distribution mirrors get updated. """ self.sha1s = {} lineRe = re.compile(r"SHA1\((.*)\)= ([0-9a-fA-F]{40})") for l in open(Config.get("ape internal", "sha1File")): m = lineRe.match(l) self.sha1s[m.group(1)] = m.group(2) def checkSha1(self, fileName): """Compare checksum for *fileName*. :returns: Result of comparison of checksum with precomputed value, ``None`` if no precomputed checksum available. """ if fileName in self.sha1s: return self.sha1s[fileName] == \ self.computeSha1(open(fileName)).hexdigest() else: return None def computeSha1(self, f): """Compute the sha1 checksum of file *f*. *f* has to be an open file. :returns: :mod:`sha1` checksum object """ ## Block size for reading files blockSize = 8 * 1024 * 1024 checksum = sha.new() block = f.read(blockSize) while len(block): checksum.update(block) block = f.read(blockSize) return checksum def getPassword(self, user): """Return the password for *user*. If it is not known, prompt the user and cache the answer for future requests. """ try: return self.passwords[user] except KeyError: passPrompt = "Type password for user '%s': " % user pw = getpass.getpass(passPrompt) self.passwords[user] = pw return pw def fetchFile(self, fileName, directory=None): """Retrieve *fileName*. If set, the result is stored in the directory *directory*, otherwise it is stored in the current directory. The :class:`DataMirror`\ s known to this manager are tried in a round-robin fashion. We remember the last site used successfully and start trying the next download from there. """ if directory: oldwd = os.getcwd() os.chdir(directory) try: if os.path.exists(fileName): # No checksum is tolerated, useful for debugging. if self.checkSha1(fileName) in [True, None]: return else: os.remove(fileName) for i in range(len(self.mirrorList)): try: self.mirrorList[self.mirrorIndex].fetchFile(fileName) break except DownloadError, error: self.mirrorIndex = (self.mirrorIndex + 1) % \ len(self.mirrorList) errorMsg = error.value else: raise DownloadError("Cannot fetch %s. Last error: %s" % \ (fileName, errorMsg)) # remember: the checksum is None if unknown. if self.checkSha1(fileName) == False: raise DownloadError("Checksum error in %s" % fileName) os.remove(fileName) finally: if directory: os.chdir(oldwd)