'''
Various miscellaneous functions.
Might end up somewhere else at somepoint.
'''

# Python modules
import cPickle
import logging
import os
import platform
import subprocess
import sys
import tarfile
import tempfile
import urllib
import zipfile

# our imports
import exit_util
import pdb_edit

from ample.constants import SHARE_DIR

CCP4_VERSION=None
SCRIPT_EXT = '.bat' if sys.platform.startswith('win') else '.sh'
EXE_EXT = '.exe' if sys.platform.startswith('win') else ''
SCRIPT_HEADER = '' if sys.platform.startswith('win') else '#!/bin/bash'

_logger = logging.getLogger(__name__)

# Reference string
references = """AMPLE: J. Bibby, R. M. Keegan, O. Mayans, M. D. Winn and D. J. Rigden.
AMPLE: a cluster-and-truncate approach to solve the crystal structures of small proteins using
rapidly computed ab initio models. (2012). Acta Cryst. D68, 1622-1631 [ doi:10.1107/S0907444912039194 ]

Routine phasing of coiled-coil protein crystal structures with AMPLE (2015). Thomas, J. M. H.,
Keegan, R. M., Bibby, J., Winn, M. D., Mayans, O. and Rigden, D. J. IUCrJ 2, 198-206.
[doi:10.1107/S2052252515002080] 

CCP4: Collaborative Computational Project, Number 4. (1994), The CCP4 Suite: Programs
for Protein Crystallography. Acta Cryst. D50, 760-763

MaxCluster: http://www.sbg.bio.ic.ac.uk/maxcluster/

MOLREP: A.A.Vagin & A.Teplyakov (1997) J. Appl. Cryst. 30, 1022-1025

MrBUMP: R.M.Keegan and M.D.Winn (2007) Acta Cryst. D63, 447-457

PHASER: McCoy, A.J., Grosse-Kunstleve, R.W., Adams, P.D., Winn, M.D.,
Storoni, L.C. & Read, R.J. (2007)
Phaser crystallographic software J. Appl. Cryst. 40, 658-674

REFMAC: G.N. Murshudov, A.A.Vagin and E.J.Dodson, (1997) Refinement of Macromolecular
Structures by the Maximum-Likelihood Method. Acta Cryst. D53, 240-255

SCWRL: G. G. Krivov, M. V. Shapovalov, and R. L. Dunbrack, Jr. Improved prediction of protein
side-chain conformations with SCWRL4. Proteins (2009).

SHELXE: "Extending molecular-replacement solutions with SHELXE". Thorn, A. and Sheldrick, G. M. (2013),
Acta Crystallographica D, 69: 2251-2256. doi: 10.1107/S0907444913027534

SPICKER: Y. Zhang, J. Skolnick, SPICKER: Approach to clustering protein structures for
near-native model selection, Journal of Computational Chemistry, 2004 25: 865-871

Theseus: THESEUS: Maximum likelihood superpositioning and analysis of macromolecular structures.
Theobald, Douglas L. & Wuttke, Deborah S. (2006b) Bioinformatics 22(17):2171-2172 [Open Access]
Supplementary Materials for Theobald and Wuttke 2006b."""

# Header string
header ="""#########################################################################
#########################################################################
#########################################################################
# CCP4: AMPLE - Ab Initio Modelling Molecular Replacement               #
#########################################################################

The authors of specific programs should be referenced where applicable:""" + \
"\n\n" + references + "\n\n"


survey_url = "http://goo.gl/forms/7xP9M4P81O"
footer = "\n" + """
#########################################################################
#***********************************************************************#
#*                          How did we do?                             *#
#*                                                                     *#
#*           Please follow this link and leave some feedback!          *#
#*                                                                     *#
#*                 {url}                      *#
#***********************************************************************#
#########################################################################
""".format(url=survey_url)


def ccp4_version():
    """Return the CCP4 version as a tuple"""
    global CCP4_VERSION
    if CCP4_VERSION is None:
        # Currently there seems no sensible way of doing this other then running a program and grepping the output
        pdbcur = 'pdbcur.exe' if sys.platform.startswith('win') else 'pdbcur'
        logf = tempfile.NamedTemporaryFile(delete=False)
        run_command([pdbcur], stdin="", logfile=logf.name)
        logf.seek(0) # rewind logfile
        tversion=None
        for i, line in enumerate(logf):
            if i > 20:break
            if line.startswith(' ### CCP4'):
                tversion=line.split()[2].rstrip(':')
                break
        
        logf.close()
        if not tversion: raise RuntimeError,"Cannot determine CCP4 version"
        vsplit = tversion.split('.')
        if len(vsplit) == 2:
            major = int(vsplit[0])
            minor =  int(vsplit[1])
            rev = '-1'
        elif len(vsplit) == 3:
            major = int(vsplit[0])
            minor = int(vsplit[1])
            rev = int(vsplit[2])
        else: raise RuntimeError,"Cannot split CCP4 version: {0}".format(tversion)
    os.unlink(logf.name)
    return (major,minor,rev)
    
def extract_models(amoptd, sequence=None, single=True, allsame=True):
    """Check a directory of pdbs or extract pdb files from a given tar/zip file or directory of pdbs
    and set the amoptd['models_dir'] entry with the directory of unpacked/validated pdbs
    """
    
    filename = amoptd['models']
    models_dir = amoptd['models_dir']
    
    # If it's already a models_dir, just check it's valid   
    if os.path.isdir(filename):
        models_dir = filename
    else:
        # Here we are extracting from a file
        if not os.path.isfile(filename):
            msg="Cannot find models file: {0}".format(filename)
            exit_util.exit_error(msg)
            
        # we need a models_dir to extract into
        assert models_dir,"extractModels needs a models_dir path!"
        if not os.path.isdir(models_dir):
            os.mkdir(models_dir)
        models_dir = models_dir
        
        # See what sort of file this is:
        f,suffix=os.path.splitext(filename)
        if suffix in ['.gz','.bz']:
            f,s2=os.path.splitext(f)
            if s2 == '.tar': suffix=s2+suffix
        
        tsuffixes=['.tar.gz','.tgz','.tar.bz','.tbz']
        suffixes=tsuffixes + ['.zip']
        if suffix not in suffixes:
            msg="Do not know how to extract files from file: {0}\n Acceptable file types are: {1}".format(filename,suffixes)
            exit_util.exit_error(msg)
        if suffix in tsuffixes:
            files = extract_tar(filename, models_dir)
        else:
            files = extract_zip(filename, models_dir)
        
        # Assume anything with one member is quark decoys
        if len(files) == 1:
            quark_filename='alldecoy.pdb'
            f = os.path.basename(files[0])
            if not f == quark_filename:
                msg="Only found one member ({0}) in file: {1} and the name was not {2}\n".format(f, filename, quark_filename)
                msg+="If this file contains valid QUARK decoys, please email: ccp4@stfc.ac.uk"
                exit_util.exit_error(msg)
            # Now extract the quark pdb files from the monolithic file
            split_quark(files[0], models_dir)
            # We delete the quark_name file as otherwise we'll try and model it
            os.unlink(files[0])
            # If we've got quark models we don't want to modify the side chains as we only have polyalanine so we
            # set this here - horribly untidy as we should have one place to decide on side chains
            _logger.info('Found QUARK models in file: {0}'.format(filename))
            amoptd['quark_models'] = True
    
    if not pdb_edit.check_pdb_directory(models_dir, sequence=sequence, single=single, allsame=allsame):
        msg="Problem importing pdb files - please check the log for more information"
        exit_util.exit_error(msg)
    
    amoptd['models_dir'] = models_dir
    return

def extract_tar(filename, directory, suffixes=['.pdb']):
    # Extracting tarfile
    _logger.info('Extracting files from tarfile: {0}'.format(filename) )
    files = []
    with tarfile.open(filename,'r:*') as tf:
        memb = tf.getmembers()
        if not len(memb):
            msg='Empty archive: {0}'.format(filename)
            exit_util.exit_error(msg)
        for m in memb:
            if os.path.splitext(m.name)[1] in suffixes:
                # Hack to remove any paths
                m.name = os.path.basename(m.name)
                tf.extract(m, path=directory)
                files.append(os.path.join(directory, m.name))
    if not len(files):
        msg='Could not find any files with suffixes {0} in archive: {1}'.format(suffixes, filename)
        exit_util.exit_error(msg)
    return files

def extract_zip(filename, directory, suffixes=['.pdb']):
    # zip file extraction
    _logger.info('Extracting files from zipfile: {0}'.format(filename) )
    if not zipfile.is_zipfile(filename):
            msg='File is not a valid zip archive: {0}'.format(filename)
            exit_util.exit_error(msg)
    zipf = zipfile.ZipFile(filename)
    zif = zipf.infolist()
    if not len(zif):
        msg = 'Empty zip file: {0}'.format(filename)
        exit_util.exit_error(msg)
    files = []
    for f in zif:
        if os.path.splitext(f.filename)[1] in suffixes:
            # Hack to rewrite name 
            f.filename = os.path.basename(f.filename)
            zipf.extract(f, path=directory)
            files.append(os.path.join(directory, f.filename))
    if not len(files):
        msg = 'Could not find any files with suffixes {0} in zipfile: {1}'.format(suffixes,filename)
        exit_util.exit_error(msg)
    return files

def find_exe(executable, dirs=None):
    """Find the executable exename.
    Args:
    executable: the name of the program or the path to an existing executable
    dirs - additional directories to search for the location
    """
    _logger.debug('Looking for executable: {0}'.format(executable) )
    
    exe_file=None
    found=False
    if is_exe(executable):
        exe_file=os.path.abspath(executable)
        found=True
    else:
        # If the user has given a path we just take the name part
        fpath,fname = os.path.split(executable)
        if fname:
            executable=fname
            
        # By default we search in the system PATH and add any additional user given paths here
        paths = os.environ["PATH"].split(os.pathsep)
        if dirs:
            paths += dirs
        _logger.debug('Checking paths: {0}'.format(paths))
        
        for path in paths:
            exe_file = os.path.abspath(os.path.join(path, executable))   
            if is_exe(exe_file):
                _logger.debug( 'Found executable {0} in directory {1}'.format(executable,path) )
                found=True
                break
    
    if not found:
        raise Exception("Cannot find executable: {0}".format(executable))
    
    _logger.debug('find_exe found executable: {0}'.format(exe_file) )
    return exe_file

def filename_append(filename=None, astr=None,directory=None, separator="_"):
    """Append astr to filename, before the suffix, and return the new filename."""
    dirname, fname = os.path.split( filename )
    name, suffix = os.path.splitext( fname )
    name  =  name + separator + astr + suffix
    if directory is None:
        directory = dirname
    return os.path.join( directory, name )

def find_maxcluster(amoptd):
    """Return path to maxcluster binary.
    If we can't find one in the path, we create a $HOME/.ample
    directory and downlod it to there
    """

    if amoptd['maxcluster_exe'] and is_exe(amoptd['maxcluster_exe']):
        return amoptd['maxcluster_exe']

    if not amoptd['maxcluster_exe']:
        if sys.platform.startswith("win"):
            amoptd['maxcluster_exe']='maxcluster.exe'
        else:
            amoptd['maxcluster_exe']='maxcluster'
    
    try:
        maxcluster_exe = find_exe(amoptd['maxcluster_exe'], dirs=[ amoptd['rcdir'] ] )
    except Exception:
        # Cannot find so we need to try and download it
        rcdir = amoptd['rcdir']
        _logger.info("Cannot find maxcluster binary in path so attempting to download it directory: {0}".format( rcdir )  )
        if not os.path.isdir( rcdir ):
            _logger.info("No ample rcdir found so creating in: {0}".format( rcdir ) )
            os.mkdir( rcdir )
        url = None
        maxcluster_exe = os.path.join( rcdir, 'maxcluster' )
        if sys.platform.startswith("linux"):
            bit=platform.architecture()[0]
            if bit=='64bit':
                url='http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster64bit'
            elif bit=='32bit':
                url='http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster'
            else:
                msg="Unrecognised system type: {0} {1}".format(sys.platform,bit)
                exit_util.exit_error(msg)
        elif sys.platform.startswith("darwin"):
            url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_i686_32bit.bin'
            #OSX PPC: http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster_PPC_32bit.bin
        elif sys.platform.startswith("win"):
            url = 'http://www.sbg.bio.ic.ac.uk/~maxcluster/maxcluster.exe'
            maxcluster_exe = os.path.join( rcdir, 'maxcluster.exe' )
        else:
            msg="Unrecognised system type: {0}".format( sys.platform )
            exit_util.exit_error(msg)
        _logger.info("Attempting to download maxcluster binary from: {0}".format( url ) )
        try:
            urllib.urlretrieve( url, maxcluster_exe )
        except Exception, e:
            msg="Error downloading maxcluster executable: {0}\n{1}".format(url,e)
            exit_util.exit_error(msg)

        # make executable
        os.chmod(maxcluster_exe, 0o777)

    return maxcluster_exe

def ideal_helices(nresidues):
    ""
    include_dir = os.path.join(SHARE_DIR, 'include')
    names = [ 'polyala5', 'polyala10', 'polyala15', 'polyala20', 'polyala25',
              'polyala30', 'polyala35', 'polyala40' ]
    polya_lengths = [5,10,15,20,25,30,35,40]
    
    ensemble_options = {}
    ensembles_data = []
    pdbs = []
    for name, nres in zip(names, polya_lengths):
        ncopies = nresidues / nres
        if ncopies < 1: ncopies = 1
        ensemble_options[ name ] = { 'ncopies' : ncopies }
        pdb = os.path.join(include_dir,"{0}.pdb".format(name))
        # Needed for pyrvapi results
        ensembles_data.append( { 'name' : name,
                                'ensemble_pdb' : pdb,
                                'num_residues' : nres,
                                 } )
        pdbs.append(pdb)
        
    return pdbs, ensemble_options, ensembles_data

def is_exe(fpath):
    return fpath and os.path.exists(fpath) and os.access(fpath, os.X_OK)

def make_workdir(work_dir, ccp4_jobid=None, rootname='AMPLE_'):
    """
    Make a work directory rooted at work_dir and return its path
    """

    if ccp4_jobid:
        dname = os.path.join( work_dir, rootname + str(ccp4_jobid) )
        if os.path.exists(dname):
            raise RuntimeError,"There is an existing AMPLE CCP4 work directory: {0}\nPlease delete/move it aside."
        os.mkdir(dname)
        return dname

    run_inc = 0
    run_making_done = False
    while not run_making_done:
        if not os.path.exists(work_dir + os.sep + rootname + str(run_inc)):
            run_making_done = True
            os.mkdir(work_dir + os.sep +rootname + str(run_inc))
        run_inc += 1
    work_dir = work_dir + os.sep + rootname + str(run_inc - 1)
    return work_dir

def run_command(cmd, logfile=None, directory=None, dolog=True, stdin=None, check=False, **kwargs):
    """Execute a command and return the exit code.

    We take care of outputting stuff to the logs and opening/closing logfiles

    Args:
    cmd - command to run as a list
    stdin - a string to use as stdin for the command
    logfile (optional) - the path to the logfile
    directory (optional) - the directory to run the job in (cwd assumed)
    dolog: bool - whether to output info to the system log
    """
    assert type(cmd) is list, "run_command needs a list!"
    if check:
        if not is_exe(cmd[0]): raise RuntimeError,"run_command cannot find executable: {0}".format(cmd[0])

    if not directory:  directory = os.getcwd()
    if dolog:
        _logger.debug("In directory {0}".format(directory))
        _logger.debug("Running command: {0}".format(" ".join(cmd)))
        if kwargs:  _logger.debug("kwargs are: {0}".format(kwargs))
    file_handle=False
    if logfile:
        if type(logfile)==file:
            file_handle=True
            logf=logfile
            logfile=os.path.abspath(logf.name)
        else:
            logfile = os.path.abspath(logfile)
            logf = open(logfile, "w")
        if dolog: _logger.debug("Logfile is: {0}".format(logfile))
    else:
        logf = tempfile.TemporaryFile()
        
    if stdin != None:
        stdinstr = stdin
        stdin = subprocess.PIPE

    # Windows needs some special treatment
    if os.name == "nt":
        kwargs.update( { 'bufsize': 0, 'shell' : "False" } )
    p = subprocess.Popen(cmd, stdin=stdin, stdout=logf, stderr=subprocess.STDOUT, cwd=directory, **kwargs)

    if stdin != None:
        p.stdin.write( stdinstr )
        p.stdin.close()
        if dolog: _logger.debug("stdin for cmd was: {0}".format( stdinstr ) )

    p.wait()
    if not file_handle: logf.close()
    return p.returncode

def saveAmoptd(amoptd):
    # Save results
    with open( amoptd['results_path'], 'w' ) as f:
        cPickle.dump( amoptd, f )
        _logger.info("Saved state as file: {0}\n".format( amoptd['results_path'] ) )
    return

def split_quark(dfile,directory):
    _logger.info("Extracting QUARK decoys from: {0} into {1}".format(dfile,directory))
    smodels = []
    with open(dfile,'r') as f:
        m=[]
        for line in f:
            if line.startswith("ENDMDL"):
                m.append(line)
                smodels.append(m)
                m = []
            else:
                m.append(line)
    if not len(smodels): raise RuntimeError,"Could not extract any models from: {0}".format(dfile)
    quark_models = []
    for i,m in enumerate(smodels):
        fpath = os.path.join(directory,"quark_{0}.pdb".format(i))
        with open(fpath,'w') as f:
            for l in m:
                # Need to reconstruct something sensible as from the coordinates on it's all quark-specific
                if l.startswith("ATOM"):
                    l = l[:54]+"  1.00  0.00              \n"
                f.write(l)
            quark_models.append(fpath)
            _logger.debug("Wrote: {0}".format(fpath))
        
    return quark_models

def tmpFileName():
    """Return a filename for a temporary file"""

    # Get temporary filenames
    t = tempfile.NamedTemporaryFile(dir=os.getcwd(), delete=True)
    tmp1 = t.name
    t.close()
    return tmp1