import requests
import urllib
import os
from boutiques.logger import raise_error, print_info
from boutiques.searcher import Searcher
from boutiques.zenodoHelper import ZenodoError

try:
    # Python 3
    from urllib.request import urlopen
    from urllib.request import urlretrieve
except ImportError:
    # Python 2
    from urllib2 import urlopen
    from urllib import urlretrieve


class Puller():

    def __init__(self, zids, verbose=False, sandbox=False):
        # remove zenodo prefix
        self.zenodo_entries = []
        self.cache_dir = os.path.join(os.path.expanduser('~'), ".cache",
                                      "boutiques")
        discarded_zids = zids
        # This removes duplicates, should maintain order
        zids = list(dict.fromkeys(zids))
        for zid in zids:
            discarded_zids.remove(zid)
            try:
                # Zenodo returns the full DOI, but for the purposes of
                # Boutiques we just use the Zenodo-specific portion (as its the
                # unique part). If the API updates on Zenodo to no longer
                # provide the full DOI, this still works because it just grabs
                # the last thing after the split.
                zid = zid.split('/')[-1]
                newzid = zid.split(".", 1)[1]
                newfname = os.path.join(self.cache_dir,
                                        "zenodo-{0}.json".format(newzid))
                self.zenodo_entries.append({"zid": newzid, "fname": newfname})
            except IndexError:
                raise_error(ZenodoError, "Zenodo ID must be prefixed by "
                            "'zenodo', e.g. zenodo.123456")
        self.verbose = verbose
        self.sandbox = sandbox
        if(self.verbose):
            for zid in discarded_zids:
                print_info("Discarded duplicate id {0}".format(zid))

    def pull(self):
        # return cached file if it exists
        json_files = []
        for entry in self.zenodo_entries:
            if os.path.isfile(entry["fname"]):
                if(self.verbose):
                    print_info("Found cached file at %s"
                               % entry["fname"])
                json_files.append(entry["fname"])
                continue

            searcher = Searcher(entry["zid"], self.verbose, self.sandbox,
                                exact_match=True)
            r = searcher.zenodo_search()

            if not len(r.json()["hits"]["hits"]):
                raise_error(ZenodoError, "Descriptor \"{0}\" "
                            "not found".format(entry["zid"]))
            for hit in r.json()["hits"]["hits"]:
                file_path = hit["files"][0]["links"]["self"]
                file_name = file_path.split(os.sep)[-1]
                if hit["id"] == int(entry["zid"]):
                    if not os.path.exists(self.cache_dir):
                        os.makedirs(self.cache_dir)
                    if(self.verbose):
                        print_info("Downloading descriptor %s"
                                   % file_name)
                    downloaded = urlretrieve(file_path, entry["fname"])
                    if(self.verbose):
                        print_info("Downloaded descriptor to "
                                   + downloaded[0])
                    json_files.append(downloaded[0])
                else:
                    raise_error(ZenodoError, "Searched-for descriptor \"{0}\" "
                                "does not match descriptor \"{1}\" returned "
                                "from Zenodo".format(entry["zid"], hit["id"]))

        return json_files