#! /usr/bin/env ccp4-python
#
#     Copyright (C) 2005 Ronan Keegan
#
#     This code is distributed under the terms and conditions of the
#     CCP4 Program Suite Licence Agreement as a CCP4 Application.
#     A copy of the CCP4 licence can be obtained by writing to the
#     CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK.
#
#
# Setup Ensemble for Phaser MR
# Ronan Keegan 12/12/04 
#

import logging
import os, sys, string
import shutil
import mol_weight
import subprocess, shlex
import rtPDB

if not os.environ.has_key('CCP4'):
    raise RuntimeError, 'CCP4 must be defined'

import Model_struct
import MRBUMP_gesamt
import glob
import collections


try:
    from ample.util.foo import ample_util
    from ample.ensembler import ensembler_util
except ImportError:
    ample_util = ensembler_util = None

class Ensemble:
   """ A class to construct an ensemble of the top match PDBs for Phaser. """

   def __init__(self):
      if os.name == "nt":
         self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose.exe') 
         self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt.exe') 
         self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset.exe') 
      else:
         self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose') 
         self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt') 
         self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset') 
 
      self.SPxyz_template=''
      self.SPxyzin=''
      self.SPxyzout=''

      self.SP_logfile='' 
      self.SP_logfile_last='' 
      self.DB_fail=False

      self.num_per_ensem=0
      self.base_MW=0
      self.meanMW=0
      self.total_MW_ensem=0
      try:
         self.debug=eval(os.environ['MRBUMP_DEBUG'])
      except:
         self.debug=False

      #self.gesamtTuple=collections.namedtuple('GesamtModel', ['pdbID', 'chainID', 'domainID', 'modelName', 'pdbFile', 'number']) 
      self.gesamtTuple=collections.namedtuple('GesamtModel', ['pdbID', 'chainID', 'domainID', 'modelName', 'pdbFile', 'unmodPDBFile', 'number']) 
      self.gesamtModelList=[]

   def setDEBUG(self, flag):
      self.debug=flag

   def setSPXYZ_TEMPLATE(self, xyzin):
      self.SPxyz_template=xyzin

   def setSPXYZIN(self, xyzin):
      self.SPxyzin=xyzin

   def setSPXYZOUT(self, xyzout):
      self.SPxyzout=xyzout

   def setSPLOGFILE(self, filename):
      self.SP_logfile=filename

   def setSPLOGFILELAST(self, filename):
      self.SP_logfile_last=filename

   def setNumPerEnsem(self, num_per_ensem):
      self.num_per_ensem=num_per_ensem

   def setup_ensemble(self, mstat, init, target_info, domain=None, outputEnsemblesDir=""):
      """  A function to setup structure data for running Phaser with ensembles. Calls superpose/gesamt
      to line up each of the template structures with each other. """

      if outputEnsemblesDir=="":
          outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles")

      if domain==None:
          workingList=mstat.sorted_MR_list
      else:
          workingList=target_info.targetDomainDict[domain].matches

      # Assingment of ensemble model to Gesamt ensemble model
      if init.keywords.GESENSEM:
 
          if init.keywords.AMPTRUNC:
              sys.stdout.write("Making Gesamt/AMPLE Ensembles\n")
          else:
              sys.stdout.write("Making Gesamt Ensemble\n")
          sys.stdout.write("\n")

          modelCount=self.makeGesamtEnsemble(init, mstat, target_info, domain=domain, outputEnsemblesDir=outputEnsemblesDir)
 
          if modelCount > 0:

              if init.keywords.GESTRUNC and len(workingList) >= 2:
                  self.makeGESAMTEnsembles(init, mstat, domain=domain, outputEnsemblesDir=outputEnsemblesDir)
                  for domainMatchName in target_info.targetDomainDict[domain].matches:
                  #    for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: 
                      if init.keywords.MDLSCULPTOR:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                      elif init.keywords.MDLCHAINSAW:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                      elif init.keywords.MDLMOLREP:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                      elif init.keywords.MDLUNMOD:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                      elif init.keywords.MDLDPDBCLP:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                      elif init.keywords.MDLPLYALA:
                          tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1]
                          mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
    
              elif init.keywords.GESTRUNC and len(workingList) < 2:
                   domainMatchName=target_info.targetDomainDict[domain].matches[0]
                   #for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: 
                   if init.keywords.MDLSCULPTOR:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].sculptor_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLCHAINSAW:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].chainsaw_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLMOLREP:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].molrep_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLUNMOD:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].unmod_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLDPDBCLP:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].pdbclip_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLPLYALA:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].plyala_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
    
              elif init.keywords.GESTRUNC == False:
                   domainMatchName=target_info.targetDomainDict[domain].matches[0]
                   #for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: 
                   if init.keywords.MDLSCULPTOR:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].sculptor_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLCHAINSAW:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].chainsaw_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLMOLREP:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].molrep_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLUNMOD:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].unmod_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLDPDBCLP:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].pdbclip_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)
                   elif init.keywords.MDLPLYALA:
                       tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1]
                       shutil.move(mstat.chain_list[domainMatchName].plyala_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename))
                       mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)

          else: 
              sys.stdout.write("Warning: no gesamt ensembles processed from this working list:\n %s\n" % (str(workingList)))

          # If we want to make Gesamt AMPLE ensembles
          if init.keywords.AMPTRUNC and len(mstat.sorted_MR_list) >= 2:
              self.makeAMPLEEnsembles(init, mstat)
          # Create the model dictionary entries for these ensembles
          self.makeEnsembleDict(init, mstat, target_info, domain=domain, outputEnsemblesDir=outputEnsemblesDir)

          sys.stdout.write("Number of Ensembles: %d\n" % mstat.ampleNumModels)
          sys.stdout.write("Number of search models in each Ensemble: %d\n" % mstat.gesamtNumModels)
          sys.stdout.write("\n")

      else:
          # ensemble count
          ecount=0
    
          sys.stdout.write("Number of Ensembles: %d\n" % init.keywords.ENSNUM)
          sys.stdout.write("Number of search models in each Ensemble: %d\n" % init.keywords.ENSMODNUM)
          sys.stdout.write("\n")

          while ecount < init.keywords.ENSNUM:
    
             # model count
             mcount=0
    
             # Set up the model list entry
             m=Model_struct.Models_struct()
             m.setNumPerEnsem(init.keywords.ENSMODNUM)
             m.setModelChainSource('ensemble_model')
             m.setPDBName('ensm')
             m.setModelName('ensemble_model%d'%ecount)
             m.setModelType('ENSMBL')
             if os.path.isdir(os.path.join(init.search_dir, 'data', 'domain_%s' % domain)) == False:
                os.mkdir(os.path.join(init.search_dir, 'data', 'domain_%s' % domain))
             m.setModel_directory(os.path.join(init.search_dir, 'data', 'domain_%s' % domain, 'ensemble_model%d'%ecount))
    
             # Create the directory structure
             os.mkdir(m.model_directory) 
             os.mkdir(os.path.join(m.model_directory, "mr")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "logs")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) 
             if "MOLREP" in init.keywords.MR_PROGRAM_LIST:
                os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep'))
                os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine'))
             if "PHASER" in init.keywords.MR_PROGRAM_LIST:
                os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser'))
                os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine'))
    
             self.setSPLOGFILE(os.path.join(m.model_directory, 'mr', 'logs', 'superpose.log'))
             self.setSPLOGFILELAST(os.path.join(m.model_directory, 'mr', 'logs', 'superpose_last.log'))
             full_log=open(self.SP_logfile,'w')
    
             if ecount<len(mstat.sorted_MR_list):
                rotated_chain_list = mstat.sorted_MR_list[ecount:]+mstat.sorted_MR_list[:ecount]
             else:
                sys.stdout.write("Warning: cannot make more ensembles\n")
                break
             for chain in rotated_chain_list:
    
                # We only want to include monomers and domains in the ensemble, ignore multimers
                if mstat.chain_list[chain].source != "PQS" or mstat.chain_list[chain].source != "PISA":
                #if chain[5] not in ['0','1','2','3','4','5','6','7','8','9']:
                # First setup the chain details. If there is no chain for a pdb we just 
                # set the chain details to be a blank space in the superpose command line.
    
                   # Use the chainsaw PDB file if available, otherwise use the original
                   if os.path.isfile(mstat.chain_list[chain].chainsaw_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].chainsaw_modelPDB
                   elif os.path.isfile(mstat.chain_list[chain].molrep_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].molrep_modelPDB
                   elif os.path.isfile(mstat.chain_list[chain].plyala_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].plyala_modelPDB
                   else:
                      input_PDBFile = mstat.chain_list[chain].PDBFile 
                   print ""
    
                # Top match entry will be our base alignment.  
                   if mcount == 0:
                      sys.stdout.write("Ensemble number %d\n" % (ecount+1))
                      sys.stdout.write("Base alignment model = %s\n" % chain)
                      sys.stdout.write("Source: %s\n" % mstat.chain_list[chain].source)
                      sys.stdout.write("Using Superpose to line models up. Superpose log:\n")
                      sys.stdout.write("   %s\n" % self.SP_logfile[init.WEB_PATH_START:])
                   
                      # Set the chain identifier. Catch for when it is not specified and it is part of a domain.
                      splitchain=string.split(chain, "_")
                      if len(splitchain) == 1:
                         chainID=""
                      else:
                         chainID=(splitchain[1][0]).upper()
    
                      mw = mol_weight.MolWeight()
                      current_MW = mw.get_pdbMW(input_PDBFile, chain=chainID, WEB_PATH_START=init.WEB_PATH_START)
                      self.meanMW = current_MW
                      #self.base_MW = current_MW
                      self.total_MW_ensem = current_MW
                      sys.stdout.write("Molecular weight of base alignment model = %.1f\n" % current_MW)
    
                      # Set the template structure for the ensemble superposition
                      self.setSPXYZ_TEMPLATE(input_PDBFile)
                      self.setSPXYZOUT(os.path.join(m.model_directory, 'mr', 'pdbfiles', mstat.chain_list[chain].chainName + '_superpose.pdb'))
                      shutil.copyfile(self.SPxyz_template, self.SPxyzout)
    
                   # All other entries will be aligned to top match 
                   else:               
    
                      sys.stdout.write("Alignment model %d is %s\n" % (mcount, chain))
                      sys.stdout.write("Source: %s\n" % mstat.chain_list[chain].source)
    
                      # Set the chain identifier. Catch for when it is not specified and it is part of a domain.
                      splitchain=string.split(chain, "_")
                      if len(splitchain) == 1:
                         chainID=""
                      else:
                         chainID=(splitchain[1][0]).upper()
    
                      current_MW = mw.get_pdbMW(input_PDBFile, chain=chainID, WEB_PATH_START=init.WEB_PATH_START)
                      sys.stdout.write("Molecular weight of alignment model = %.1f\n" % current_MW)
                      percent_diff = (current_MW - self.meanMW) * 100.0 / self.meanMW
    
                      if abs(percent_diff) > 10.0:
                         sys.stdout.write("WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large\n" % percent_diff)
                         sys.stdout.write("WARNING: Skipping model\n")
                         sys.stdout.write("\n")
                         continue
                      else:
                         if self.debug:
                            sys.stdout.write("Current average molecular weight = %.1f\n" % self.meanMW)
                            sys.stdout.write("Deviation of base alignment model from mean = %.2f %%\n" % percent_diff)
                         self.total_MW_ensem += current_MW
                         self.meanMW=self.total_MW_ensem/(mcount+1)
    
                      #self.total_MW_ensem += current_MW
                      #mean_MW_ensem = self.total_MW_ensem / (mcount+1)
                      #percent_diff = (self.base_MW - mean_MW_ensem) * 100.0 / mean_MW_ensem
                      #print "Molecular weight of alignment model = %.1f" % current_MW
                      #if self.debug:
                      #   print "Current average molecular weight = %.1f" % mean_MW_ensem
                      #   print "Deviation of base alignment model from mean = %.2f %%" % percent_diff
                      #if abs(percent_diff) > 10.0:
                      #   print "WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large" % percent_diff
                      #   print "WARNING: Stopping here with %d models in the ensemble" % mcount
                      #   print ""
                      #   break
    
                      self.setSPXYZIN(input_PDBFile)
                      self.setSPXYZOUT(os.path.join(m.model_directory, 'mr', 'pdbfiles', mstat.chain_list[chain].chainName + '_superpose.pdb'))
    
                      # Set the command line
                      #command_line = self.superposeEXE + " "  + self.SPxyzin  + " " + self.SPxyz_template + " -p " + self.SPxyzout 
                      command_line = self.gesamtEXE + " "  + self.SPxyzin  + " " + self.SPxyz_template + " " + self.SPxyzout 
               
                      # Launch superpose
                      if os.name == "nt":
                         process_args = shlex.split(command_line, posix=False)
                         p = subprocess.Popen(process_args, shell="True", stdin = subprocess.PIPE,
                                           stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
                      else:
                         process_args = shlex.split(command_line)
                         p = subprocess.Popen(process_args, stdin = subprocess.PIPE,
                                           stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
    
                      (o, i) = (p.stdout, p.stdin)
                
                      i.close()
    
                      # Capture the output from superpose
                      log=open(self.SP_logfile_last, "w")
                      line=o.readline()
                      while line:
                         log.write(line)
                         line=o.readline()
                      o.close()
                      log.close()
    
                      last_log=open(self.SP_logfile_last, 'r')
                      line = last_log.readline()
                      while line:
                        # copy to full log file, that is kept
                        full_log.write(line)
                        # extract info from current log file
                        if 'RMSD' in line and "=" in line:
                           rmsd_index = string.split(line).index('RMSD')
                           rmsd = float(string.split(line)[rmsd_index+2].replace(",", ""))
                           align_length = int(string.split(line)[-1])
                           print "which aligns to base model with RMSD = %.3f over %d residues" % (rmsd,align_length)
                        line = last_log.readline()
                      last_log.close()
    
                   # Add this pdb code to the ensemble list (along with its seq ID and rms value)
    
                   m.PDBfile.append(self.SPxyzout)
                   m.seqID.append(mstat.chain_list[chain].seqID)
                   m.rms.append(mstat.chain_list[chain].rms)
             
                   # When we reach the number of ensemble structures to be used, break out.
                   mcount=mcount+1
                   if mcount >= m.num_per_ensem:
                      print ""
                      break 
    
                   # If there are too many models in each ensemble, reduce
                   if mcount >= len(rotated_chain_list)-1:
                      print "Warning: number of models in each ensemble reduced to %d to ensure ensemble uniqueness\n"%mcount
                      m.num_per_ensem = mcount
                      break
    
             full_log.close()
    
             if mcount < m.num_per_ensem:
                m.num_per_ensem = mcount
    
             # Add the ensemble to the model list and create an entry in the results_dict      
             mstat.model_list[m.name]=m
             
             # Make the results dictionary
             import make_dictionary
             mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir)
           
             # Insert the ensemble at the top of the sorted list
             mstat.sorted_model_list.insert(0,m.name)
             ecount += 1 

   def gesamtPrep(self, MODELTYPE, modelList, mstat, count, domain=None):
      """ Prepare the Gesamt ensemble """

      self.gesamtModelList=[]

      tempDict=dict([])
      for chain in modelList:
          if MODELTYPE=="PDBCLP": 
              pdbFile=mstat.chain_list[chain].pdbclip_modelPDB
          elif MODELTYPE=="PLYALA": 
              pdbFile=mstat.chain_list[chain].plyala_modelPDB
          elif MODELTYPE=="UNMOD": 
              pdbFile=mstat.chain_list[chain].unmod_modelPDB
          elif MODELTYPE=="MOLREP": 
              pdbFile=mstat.chain_list[chain].molrep_modelPDB
          elif MODELTYPE=="CHNSAW": 
              pdbFile=mstat.chain_list[chain].chainsaw_modelPDB
          elif MODELTYPE=="SCLPTR": 
              pdbFile=mstat.chain_list[chain].sculptor_modelPDB
          if os.path.isfile(pdbFile):
                tempDict[chain]=pdbFile
          else:
                sys.stdout.write("Warning: Missing %s output file! Missing file:\n %s\n"  % (MODELTYPE, pdbFile))
 
      number=len(tempDict.keys())
      for chain in tempDict.keys():
          gT=self.gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, domain, \
                              mstat.chain_list[chain].chainName + '_%s' % MODELTYPE, tempDict[chain], mstat.chain_list[chain].PDBFile, number)
          self.gesamtModelList.append(gT)
          count=count+1      
          number=count
          #mstat.EnsTemplate=MODELTYPE
 
      return count
    
   def makeGesamtEnsemble(self, init, mstat, target_info, domain=None, outputEnsemblesDir=""):
      """ Make the Gesamt ensemble """
     
      # Setup the output ensembles directory if not processing a domain
      if outputEnsemblesDir=="":
          outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles")

      # Set the list of chains to process, domain-specific or not
      if domain==None:
          workingList=mstat.sorted_MR_list
      else:
          workingList=target_info.targetDomainDict[domain].matches

      # Work out which template to use: Sculptor is top preference etc.
      modelTCount=0
      for TYPE in [init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA, init.keywords.MDLUNMOD, init.keywords.MDLMOLREP, init.keywords.MDLCHAINSAW, init.keywords.MDLSCULPTOR]: 
          if TYPE:
              mstat.EnsTemplate=mstat.modelTypeStringList[modelTCount]
          modelTCount=modelTCount+1
                
      # Prepare the models for Gesamt alignment
      count=self.gesamtPrep(mstat.EnsTemplate, workingList, mstat, 0, domain=domain)

      if self.gesamtModelList==[]:
          sys.stdout.write("Error: No pdb models found for Gesamt alignment\n")
          sys.stdout.write("\n")
          sys.exit()

      # take the first non-ensemble entry as alignment base and remove from list
      mstat.gesamtAlignModel = self.gesamtModelList[0].modelName
      alignModelPDB    = self.gesamtModelList[0].pdbFile

      # Set the sequence ID for the ensembles - it should be the seqID of the alignment base model
      mstat.gesamtEnsembleSeqID=mstat.chain_list[workingList[0]].seqID

      # Set the Gesamt log file, alignment output file and the script file
      mstat.gesamtAlnFile=os.path.join(init.sequences_dir, "gesamtMultModelAlign.seq")
      logfile=os.path.join(init.logs_dir, "gesamtMultModelAlign.log")
      gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlign.sh")
      mstat.gesamtNumModels=len(self.gesamtModelList)

      # Set the name of the multi-model output pdb from Gesamt (Base Ensemble)
      mstat.gesamtBaseEnsemble=os.path.join(outputEnsemblesDir, "gesamtBaseAlignment.pdb")
      #mstat.gesamtCSVFile=os.path.join(init.logs_dir, "gesamtCSVfile.csv")
      #mstat.gesamtCSVCOREfile=os.path.join(init.logs_dir, "gesamtCSVCOREfile.csv")

      mstat.gesamtCSVFileDict[domain]=os.path.join(init.logs_dir, "gesamtCSVfile_%d.csv" % domain)
      mstat.gesamtCSVCOREfileDict[domain]=os.path.join(init.logs_dir, "gesamtCSVCOREfile_%d.csv" % domain)

      # Loop over the models and align them to the alignModel using Gesamt, then move them to the models directory
      modelCount=0
      if len(self.gesamtModelList) >= 2:
          pdbList=[]
          pdbDict=dict([])
          for gesamtModel in self.gesamtModelList[1:]:
              # Create the pdb list and dictionary for this alignment
              pdbList.append(gesamtModel.pdbFile)
              pdbDict[gesamtModel.pdbFile]=gesamtModel.chainID
    
          pdbList.append(alignModelPDB)
          pdbDict[alignModelPDB]=self.gesamtModelList[0].chainID
             
          if mstat.EnsTemplate == "MOLREP":
              MODELTYPE="MOLREP"
          else:
              MODELTYPE=""

          # Run Gesamt to align to the master and copy to the models directory
          gesamtRun=MRBUMP_gesamt.Gesamt()
          gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, \
                              logfile=logfile, alnfile=mstat.gesamtAlnFile, MERGE_OUTPUT=False, \
                              TYPE=MODELTYPE, script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG']))
          # Run it again but this time output all models to one PDB file
          gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlignOnePDB.sh")
          logfile=os.path.join(init.logs_dir, "gesamtMultModelAlignOnePDB.log")
          gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, outputPDB=mstat.gesamtBaseEnsemble, \
                              logfile=logfile, alnfile=mstat.gesamtAlnFile, csvfile=mstat.gesamtCSVFileDict[domain], MERGE_OUTPUT=True, \
                              TYPE=MODELTYPE, script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG']))
          
          for gesamtModel in self.gesamtModelList:
              gesamtAlignFile =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + "_" + str(gesamtModel.number) + ".pdb"
              gesamtOrigFile  =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + ".pdb"
              if os.path.isfile(os.path.join(mstat.models_dir, gesamtAlignFile)):
                  shutil.move(os.path.join(mstat.models_dir, gesamtAlignFile), os.path.join(mstat.models_dir, gesamtOrigFile))
                  shutil.copyfile(os.path.join(mstat.models_dir, gesamtOrigFile), gesamtModel.pdbFile)
                  for dom in target_info.targetDomainDict.keys():
                      if gesamtModel.modelName[0:7] in target_info.targetDomainDict[dom].matches:
                          tempPDBFilename=os.path.split(gesamtModel.pdbFile)[-1]
                          shutil.move(os.path.join(mstat.models_dir, gesamtOrigFile), os.path.join(mstat.models_dir, "domain_%s" % str(target_info.targetDomainDict[dom].ID), tempPDBFilename))
                          # rotate and tranlsate the unmodified version of the input pdb file
                          if init.keywords.RTUNMOD:
                              RTList=gesamtRun.extractCSVRTmatrix(csvFile=mstat.gesamtCSVFileDict[domain], alignedPDBFile=gesamtModel.pdbFile)
                              rtPDB.pdbRotateTranslate(pdbin=gesamtModel.unmodPDBFile, 
                                                       pdbout=os.path.join(mstat.models_dir, "domain_%s" % str(target_info.targetDomainDict[dom].ID), "unmodified", os.path.split(gesamtModel.unmodPDBFile)[-1]), 
                                                       chainID=gesamtModel.chainID, 
                                                       RTmatrix=[float(x) for x in RTList[-1]])
                  modelCount=modelCount+1
              else:
                  sys.stdout.write("Ensemble file move: file missing:\n %s\n" % os.path.join(mstat.models_dir, gesamtAlignFile))
          
      # If there was only one model found in the search then don't run gesamt and just copy the model to the models directory
      else:
          if os.path.isfile(alignModelPDB):
              shutil.copyfile(alignModelPDB, os.path.join(mstat.models_dir, "domain_%s" % str(self.gesamtModelList[0].domainID), os.path.split(alignModelPDB)[-1]))
              # copy in the unmodified version of the input pdb file. Here the position is the same as no alignment has been made
              if init.keywords.RTUNMOD:
                  shutil.copyfile(self.gesamtModelList[0].unmodPDBFile, 
                                  os.path.join(mstat.models_dir, "domain_%s" % str(self.gesamtModelList[0].domainID), "unmodified", os.path.split(self.gesamtModelList[0].unmodPDBFile)[-1]))
              modelCount=modelCount+1
          else:
              sys.stdout.write("Warning: could not find model file:\n  %s\n" % alignModelPDB)

      return modelCount

   def makeGESAMTEnsembles(self, init, mstat, domain=None, outputEnsemblesDir=""):
       """ Make Gesamt variance-based truncated ensembles """

       if outputEnsemblesDir=="":
           outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles")

       slevel=10
       struncation="Cbeta"
       truncationList=[]
       if init.keywords.GTLEVEL != None:
           if isinstance(init.keywords.GTLEVEL, float) or isinstance(init.keywords.GTLEVEL, int): 
               sys.stdout.write("GTLEVEL has been set to %s. Outputing single truncated Gesamt ensemble at this truncation level\n" % str(init.keywords.GTLEVEL)) 
               truncationList=[str(init.keywords.GTLEVEL)]
           else:
               sys.stdout.write("Warning: GTLEVEL keyword has been given but is neither a float nor an integer. Defaulting to 10.0 Anstroms truncation level for Gesamt Ensemble\n")
               truncationList=["10.0"]
       else:
           #truncationList=["20.0", "10.0", "8.0", "6.0", "5.0", "4.0", "3.0", "2.0", "1.0", "0.8", "0.6", "0.5", "0.4", "0.3", "0.2", "0.1"]
           j=init.keywords.GTPERCENT
           while j <=100:
               truncationList.append(j)
               j=j+init.keywords.GTPERCENT

       gesamtRun=MRBUMP_gesamt.Gesamt()
       for i in truncationList: 	
           if domain == None:
               gesamtRun.makeGesTruncEnsemble(mstat.gesamtBaseEnsemble, os.path.join(outputEnsemblesDir, "gesamtEnsTrunc_%s_Side%s.pdb" % (i, struncation)), 
                                          variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain], truncation_level=float(i), sidechain_level=slevel)
           else:
               gesamtRun.makeGesTruncEnsemble(mstat.gesamtBaseEnsemble, os.path.join(outputEnsemblesDir, "gesamtEnsTrunc_%s_%s_Side%s.pdb" % (domain, i, struncation)), 
                                          variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain], truncation_level=float(i), sidechain_level=slevel)

   def makeAMPLEEnsembles(self, init, mstat):
       """ Make AMPLE style truncated ensembles """

       ensembles = self.ample_generate_ensembles(init, alignment_file=mstat.gesamtAlnFile + ".gesamt", logdir=os.path.join(init.search_dir,'logs'))

   def makeEnsembleDict(self, init, mstat, target_info, domain=None, outputEnsemblesDir=""):
       """ Make the dictionary entries for the Gesamt ensemble(s) """

       # Setup the output ensembles directory if not processing a domain
       if outputEnsemblesDir=="":
           outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles")

       # Set the list of chains to process, domain-specific or not
       if domain==None:
           workingList=mstat.sorted_MR_list
       else:
           workingList=target_info.targetDomainDict[domain].matches

       ensFileList=os.listdir(outputEnsemblesDir)
       ensembleTuple=collections.namedtuple('ensembleModel', ['modelName', 'pdbFile', 'truncationLevel', 'pdbName', 'fileName']) 
       for pdbfile in ensFileList:
           if init.keywords.AMPTRUNC:
               if "_" in pdbfile and "gesamt" not in pdbfile.lower():
                   if string.split(pdbfile, "_")[0][0]=="e":
                       truncationLevel=string.split(pdbfile, "_")[0].replace("e", "")
               else:
                   truncationLevel="110"
               if int(truncationLevel) < 10:
                   pdbName="e00" + truncationLevel 
               elif int(truncationLevel) < 100:
                   pdbName="e0" + truncationLevel 
               else:
                   pdbName="e" + truncationLevel 
           if init.keywords.GESTRUNC:
               if "_Side" in pdbfile and "gesamt" in pdbfile.lower():
                   truncationLevel=string.split(pdbfile, "_")[1]
                   pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 
               elif "gesamtBaseAlignment" in pdbfile:  	
                   truncationLevel="All"
                   pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 
           if not init.keywords.GESTRUNC and not init.keywords.AMPTRUNC:
               truncationLevel="All"
               pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 
               # We still produce the CSVCORE for ccp4mg
               if init.keywords.GESENSEM:
                   gesamtRun=MRBUMP_gesamt.Gesamt()
                   gesamtRun.makeGesTruncFile(variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain])

           eT=ensembleTuple(os.path.splitext(pdbfile)[0], os.path.join(outputEnsemblesDir, pdbfile), truncationLevel, pdbName, pdbfile)
           mstat.ampleEnsembleList.append(eT)
           
           # Set up the model list entry
           m=Model_struct.Models_struct()
           m.setModelChainSource('ensemble_model')
           m.targetDomain=domain
           m.myPHTargetDomain=domain
           m.setPDBName(eT.pdbName)
           m.setModelType('ENSMBL')
           if domain == None:
               m.setModelName('ensemble_%s' % eT.pdbName)
               m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_%s'% eT.pdbName))
           else:
               m.setModelName('ensemble_%s_%s' % (eT.pdbName, domain))
               m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_%s_%d'% (eT.pdbName, domain)))
           m.PDBfile=[]
           m.PDBfile.append(eT.pdbFile)
           m.seqID.append(mstat.gesamtEnsembleSeqID)
           m.rms.append(mstat.chain_list[workingList[0]].rms)
           m.num_per_ensem=1#mstat.gesamtNumModels
  
           # Create the directory structure
           os.mkdir(m.model_directory) 
           os.mkdir(os.path.join(m.model_directory, "mr")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "logs")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) 
           if "MOLREP" in init.keywords.MR_PROGRAM_LIST:
              os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep'))
              os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine'))
           if "PHASER" in init.keywords.MR_PROGRAM_LIST:
              os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser'))
              os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine'))
           shutil.copyfile(eT.pdbFile, os.path.join(m.model_directory, eT.fileName))

           # Add the ensemble to the model list and create an entry in the results_dict      
           mstat.model_list[m.name]=m
         
           # Make the results dictionary
           import make_dictionary
           mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir)
       
           # Insert the ensemble at the top of the sorted list
           mstat.sorted_model_list.insert(0,m.name)

       # Set the number of AMPLE/Gesamt ensembles
       mstat.ampleNumModels=len(mstat.ampleEnsembleList)

   def ample_generate_ensembles(self, init, alignment_file=None, logdir=None):
        """ Make the truncated ensembles using AMPLE method """
        
        # TODO: requires treatment for ab initio truncation method. Only homologs work right now
        if ensembler_util is None:
            raise ImportError("Cannot import ample ensembler_util")

        logger = logging.getLogger()
        logger.setLevel(logging.DEBUG)
        # create file handler for debug output
        assert logdir
        logfile = os.path.join(logdir,'ample.log')
        fl = logging.FileHandler(logfile)
        fl.setLevel(logging.DEBUG)
        formatter = logging.Formatter('%(asctime)s - %(name)s [%(lineno)d] - %(levelname)s - %(message)s')
        fl.setFormatter(formatter)
        logger.addHandler(fl)
        
        # get the module for homologs ensembling
        ensemble_module = ensembler_util.find_ensembler_module({"homologs" : True, "single_model_mode" : False})
        ensembler = ensemble_module.Ensembler()
         
        ensembler.theseus_exe = os.path.join(os.environ['CCP4'], 'bin', 'theseus' + ample_util.EXE_EXT)
        ensembler.maxcluster_exe = ample_util.find_maxcluster({'maxcluster_exe' : None,
                                                               'rcdir' : os.path.join( os.path.expanduser("~"), ".ample") })
        ensembler.subcluster_exe = ensembler.maxcluster_exe
        ensembler.max_ensemble_models = 30
        cluster_exe = os.path.join(os.environ['CCP4'], 'bin', 'spicker' + ample_util.EXE_EXT)
        
        models_dir = os.path.join(init.search_dir, 'models')
        ensembles_directory = os.path.join(models_dir, 'ensembles')
        if not os.path.isdir(ensembles_directory): os.mkdir(ensembles_directory)
        work_dir = os.path.join(init.search_dir, 'models', 'ensemble_workdir')
        os.mkdir(work_dir)
        os.chdir(work_dir)
        models = glob.glob(os.path.join(models_dir, "*.pdb"))
        
        # Works specifically for homologs here. Kwargs would need to be changed to figure out otherwise
        ensembles = ensembler.generate_ensembles(models,
                                                 percent_truncation=5,
                                                 truncation_method='percent',
                                                 ensembles_directory=ensembles_directory,
                                                 alignment_file=alignment_file,
                                                 work_dir=work_dir,
                                                 nproc=1,
                                                 homolog_aligner='gesamt',
                                                 mustang_exe=None,
                                                 gesamt_exe=os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT),
                                                 side_chain_treatments=None
                                                )
        return ensembles