#! /usr/bin/env ccp4-python
#
#     Copyright (C) 2005 Ronan Keegan
#
#     This code is distributed under the terms and conditions of the
#     CCP4 Program Suite Licence Agreement as a CCP4 Application.
#     A copy of the CCP4 licence can be obtained by writing to the
#     CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK.
#
#
# Setup Ensemble for Phaser MR
# Ronan Keegan 12/12/04 
#

import logging
import os, sys, string
import shutil
import mol_weight
import subprocess, shlex

if not os.environ.has_key('CCP4'):
    raise RuntimeError, 'CCP4 must be defined'

import Model_struct
import MRBUMP_gesamt
import glob
import collections


try:
    from ample.util.foo import ample_util
    from ample.ensembler import ensembler_util
except ImportError:
    ample_util = ensembler_util = None

class Ensemble:
   """ A class to construct an ensemble of the top match PDBs for Phaser. """

   def __init__(self):
      if os.name == "nt":
         self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose.exe') 
         self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt.exe') 
         self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset.exe') 
      else:
         self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose') 
         self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt') 
         self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset') 
 
      self.SPxyz_template=''
      self.SPxyzin=''
      self.SPxyzout=''

      self.SP_logfile='' 
      self.SP_logfile_last='' 
      self.DB_fail=False

      self.num_per_ensem=0
      self.base_MW=0
      self.meanMW=0
      self.total_MW_ensem=0
      try:
         self.debug=eval(os.environ['MRBUMP_DEBUG'])
      except:
         self.debug=False

   def setDEBUG(self, flag):
      self.debug=flag

   def setSPXYZ_TEMPLATE(self, xyzin):
      self.SPxyz_template=xyzin

   def setSPXYZIN(self, xyzin):
      self.SPxyzin=xyzin

   def setSPXYZOUT(self, xyzout):
      self.SPxyzout=xyzout

   def setSPLOGFILE(self, filename):
      self.SP_logfile=filename

   def setSPLOGFILELAST(self, filename):
      self.SP_logfile_last=filename

   def setNumPerEnsem(self, num_per_ensem):
      self.num_per_ensem=num_per_ensem

   def setup_ensemble(self, mstat, init):
      """  A function to setup structure data for running Phaser with ensembles. Calls superpose/gesamt
      to line up each of the template structures with each other. """

      # Assingment of ensemble model to Gesamt ensemble model
      if init.keywords.GESENSEM:
 
          if init.keywords.AMPTRUNC:
              sys.stdout.write("Making Gesamt/AMPLE Ensembles\n")
          else:
              sys.stdout.write("Making Gesamt Ensemble\n")
          sys.stdout.write("\n")

          self.makeGesamtEnsemble(init, mstat)
          if init.keywords.GESTRUNC and len(mstat.sorted_MR_list) >= 2:
              self.makeGESAMTEnsembles(init, mstat)
          # If we want to make Gesamt AMPLE ensembles
          if init.keywords.AMPTRUNC and len(mstat.sorted_MR_list) >= 2:
              self.makeAMPLEEnsembles(init, mstat)
          # Create the model dictionary entries for these ensembles
          self.makeEnsembleDict(init, mstat)

          sys.stdout.write("Number of Ensembles: %d\n" % mstat.ampleNumModels)
          sys.stdout.write("Number of search models in each Ensemble: %d\n" % mstat.gesamtNumModels)
          sys.stdout.write("\n")

      else:
          # ensemble count
          ecount=0
    
          sys.stdout.write("Number of Ensembles: %d\n" % init.keywords.ENSNUM)
          sys.stdout.write("Number of search models in each Ensemble: %d\n" % init.keywords.ENSMODNUM)
          sys.stdout.write("\n")

          while ecount < init.keywords.ENSNUM:
    
             # model count
             mcount=0
    
             # Set up the model list entry
             m=Model_struct.Models_struct()
             m.setNumPerEnsem(init.keywords.ENSMODNUM)
             m.setModelChainSource('ensemble_model')
             m.setPDBName('ensm')
             m.setModelName('ensemble_model%d'%ecount)
             m.setModelType('ENSMBL')
             m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_model%d'%ecount))
    
             # Create the directory structure
             os.mkdir(m.model_directory) 
             os.mkdir(os.path.join(m.model_directory, "mr")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "logs")) 
             os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) 
             if "MOLREP" in init.keywords.MR_PROGRAM_LIST:
                os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep'))
                os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine'))
             if "PHASER" in init.keywords.MR_PROGRAM_LIST:
                os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser'))
                os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine'))
    
             self.setSPLOGFILE(os.path.join(m.model_directory, 'mr', 'logs', 'superpose.log'))
             self.setSPLOGFILELAST(os.path.join(m.model_directory, 'mr', 'logs', 'superpose_last.log'))
             full_log=open(self.SP_logfile,'w')
    
             if ecount<len(mstat.sorted_MR_list):
                rotated_chain_list = mstat.sorted_MR_list[ecount:]+mstat.sorted_MR_list[:ecount]
             else:
                sys.stdout.write("Warning: cannot make more ensembles\n")
                break
             for chain in rotated_chain_list:
    
                # We only want to include monomers and domains in the ensemble, ignore multimers
                if mstat.chain_list[chain].source != "PQS" or mstat.chain_list[chain].source != "PISA":
                #if chain[5] not in ['0','1','2','3','4','5','6','7','8','9']:
                # First setup the chain details. If there is no chain for a pdb we just 
                # set the chain details to be a blank space in the superpose command line.
    
                   # Use the chainsaw PDB file if available, otherwise use the original
                   if os.path.isfile(mstat.chain_list[chain].chainsaw_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].chainsaw_modelPDB
                   elif os.path.isfile(mstat.chain_list[chain].molrep_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].molrep_modelPDB
                   elif os.path.isfile(mstat.chain_list[chain].plyala_modelPDB):
                      input_PDBFile = mstat.chain_list[chain].plyala_modelPDB
                   else:
                      input_PDBFile = mstat.chain_list[chain].PDBFile 
                   print ""
    
                # Top match entry will be our base alignment.  
                   if mcount == 0:
                      sys.stdout.write("Ensemble number %d\n" % (ecount+1))
                      sys.stdout.write("Base alignment model = %s\n" % chain)
                      sys.stdout.write("Source: %s\n" % mstat.chain_list[chain].source)
                      sys.stdout.write("Using Superpose to line models up. Superpose log:\n")
                      sys.stdout.write("   %s\n" % self.SP_logfile[init.WEB_PATH_START:])
                   
                      # Set the chain identifier. Catch for when it is not specified and it is part of a domain.
                      splitchain=string.split(chain, "_")
                      if len(splitchain) == 1:
                         chainID=""
                      else:
                         chainID=(splitchain[1][0]).upper()
    
                      mw = mol_weight.MolWeight()
                      current_MW = mw.get_pdbMW(input_PDBFile, chain=chainID, WEB_PATH_START=init.WEB_PATH_START)
                      self.meanMW = current_MW
                      #self.base_MW = current_MW
                      self.total_MW_ensem = current_MW
                      sys.stdout.write("Molecular weight of base alignment model = %.1f\n" % current_MW)
    
                      # Set the template structure for the ensemble superposition
                      self.setSPXYZ_TEMPLATE(input_PDBFile)
                      self.setSPXYZOUT(os.path.join(m.model_directory, 'mr', 'pdbfiles', mstat.chain_list[chain].chainName + '_superpose.pdb'))
                      shutil.copyfile(self.SPxyz_template, self.SPxyzout)
    
                   # All other entries will be aligned to top match 
                   else:               
    
                      sys.stdout.write("Alignment model %d is %s\n" % (mcount, chain))
                      sys.stdout.write("Source: %s\n" % mstat.chain_list[chain].source)
    
                      # Set the chain identifier. Catch for when it is not specified and it is part of a domain.
                      splitchain=string.split(chain, "_")
                      if len(splitchain) == 1:
                         chainID=""
                      else:
                         chainID=(splitchain[1][0]).upper()
    
                      current_MW = mw.get_pdbMW(input_PDBFile, chain=chainID, WEB_PATH_START=init.WEB_PATH_START)
                      sys.stdout.write("Molecular weight of alignment model = %.1f\n" % current_MW)
                      percent_diff = (current_MW - self.meanMW) * 100.0 / self.meanMW
    
                      if abs(percent_diff) > 10.0:
                         sys.stdout.write("WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large\n" % percent_diff)
                         sys.stdout.write("WARNING: Skipping model\n")
                         sys.stdout.write("\n")
                         continue
                      else:
                         if self.debug:
                            sys.stdout.write("Current average molecular weight = %.1f\n" % self.meanMW)
                            sys.stdout.write("Deviation of base alignment model from mean = %.2f %%\n" % percent_diff)
                         self.total_MW_ensem += current_MW
                         self.meanMW=self.total_MW_ensem/(mcount+1)
    
                      #self.total_MW_ensem += current_MW
                      #mean_MW_ensem = self.total_MW_ensem / (mcount+1)
                      #percent_diff = (self.base_MW - mean_MW_ensem) * 100.0 / mean_MW_ensem
                      #print "Molecular weight of alignment model = %.1f" % current_MW
                      #if self.debug:
                      #   print "Current average molecular weight = %.1f" % mean_MW_ensem
                      #   print "Deviation of base alignment model from mean = %.2f %%" % percent_diff
                      #if abs(percent_diff) > 10.0:
                      #   print "WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large" % percent_diff
                      #   print "WARNING: Stopping here with %d models in the ensemble" % mcount
                      #   print ""
                      #   break
    
                      self.setSPXYZIN(input_PDBFile)
                      self.setSPXYZOUT(os.path.join(m.model_directory, 'mr', 'pdbfiles', mstat.chain_list[chain].chainName + '_superpose.pdb'))
    
                      # Set the command line
                      #command_line = self.superposeEXE + " "  + self.SPxyzin  + " " + self.SPxyz_template + " -p " + self.SPxyzout 
                      command_line = self.gesamtEXE + " "  + self.SPxyzin  + " " + self.SPxyz_template + " " + self.SPxyzout 
               
                      # Launch superpose
                      if os.name == "nt":
                         process_args = shlex.split(command_line, posix=False)
                         p = subprocess.Popen(process_args, shell="True", stdin = subprocess.PIPE,
                                           stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
                      else:
                         process_args = shlex.split(command_line)
                         p = subprocess.Popen(process_args, stdin = subprocess.PIPE,
                                           stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
    
                      (o, i) = (p.stdout, p.stdin)
                
                      i.close()
    
                      # Capture the output from superpose
                      log=open(self.SP_logfile_last, "w")
                      line=o.readline()
                      while line:
                         log.write(line)
                         line=o.readline()
                      o.close()
                      log.close()
    
                      last_log=open(self.SP_logfile_last, 'r')
                      line = last_log.readline()
                      while line:
                        # copy to full log file, that is kept
                        full_log.write(line)
                        # extract info from current log file
                        if 'RMSD' in line and "=" in line:
                           rmsd_index = string.split(line).index('RMSD')
                           rmsd = float(string.split(line)[rmsd_index+2].replace(",", ""))
                           align_length = int(string.split(line)[-1])
                           print "which aligns to base model with RMSD = %.3f over %d residues" % (rmsd,align_length)
                        line = last_log.readline()
                      last_log.close()
    
                   # Add this pdb code to the ensemble list (along with its seq ID and rms value)
    
                   m.PDBfile.append(self.SPxyzout)
                   m.seqID.append(mstat.chain_list[chain].seqID)
                   m.rms.append(mstat.chain_list[chain].rms)
             
                   # When we reach the number of ensemble structures to be used, break out.
                   mcount=mcount+1
                   if mcount >= m.num_per_ensem:
                      print ""
                      break 
    
                   # If there are too many models in each ensemble, reduce
                   if mcount >= len(rotated_chain_list)-1:
                      print "Warning: number of models in each ensemble reduced to %d to ensure ensemble uniqueness\n"%mcount
                      m.num_per_ensem = mcount
                      break
    
             full_log.close()
    
             if mcount < m.num_per_ensem:
                m.num_per_ensem = mcount
    
             # Add the ensemble to the model list and create an entry in the results_dict      
             mstat.model_list[m.name]=m
             
             # Make the results dictionary
             import make_dictionary
             mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir)
           
             # Insert the ensemble at the top of the sorted list
             mstat.sorted_model_list.insert(0,m.name)
             ecount += 1 

   def makeGesamtEnsemble(self, init, mstat):
      """ Make the Gesamt ensemble """
     
############################################################
#      ensemblingSortedModels=[]
#      if init.keywords.DOPHMMER:
#          for chain in mstat.sorted_MR_list:
#              if mstat.PHresultsDict[chain].score >= init.keywords.PMRCUTOFF:
#                  print chain, init.keywords.PMRCUTOFF, mstat.PHresultsDict[chain].localSEQID, mstat.PHresultsDict[chain].score
#                  ensemblingSortedModels.append(chain)
#      elif init.keywords.DOHHPRED:
#          for chain in mstat.sorted_MR_list:
#              if mstat.HHresultsDict[chain].score >= init.keywords.HHPCUTOFF:
#                  print chain, init.keywords.HHPCUTOFF, mstat.PHresultsDict[chain].localSEQID, mstat.PHresultsDict[chain].score
#                  ensemblingSortedModels.append(chain)
#      else: 
#          sys.stdout.write("Ensembling with no search\n\n")
#      print ensemblingSortedModels
############################################################

      gesamtTuple=collections.namedtuple('GesamtModel', ['pdbID', 'chainID', 'modelName', 'pdbFile', 'number']) 
      gesamtModelList=[]
      count=0
      number=len(mstat.sorted_MR_list)
      
      if init.keywords.MDLSCULPTOR:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].sculptor_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_SCLPTR', mstat.chain_list[chain].sculptor_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="SCLPTR"
      elif init.keywords.MDLCHAINSAW:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].chainsaw_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_CHNSAW', mstat.chain_list[chain].chainsaw_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="CHNSAW"
      elif init.keywords.MDLMOLREP:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].molrep_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_MOLREP', mstat.chain_list[chain].molrep_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="MOLREP"
      elif init.keywords.MDLUNMOD:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].unmod_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_UNMOD', mstat.chain_list[chain].unmod_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="UNMOD"
      elif init.keywords.MDLDPDBCLP:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].pdbclp_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_PDBCLP', mstat.chain_list[chain].pdbclp_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="PDBCLP"
      elif init.keywords.MDLPLYALA:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].plyala_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_PLYALA', mstat.chain_list[chain].plyala_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="PLYALA"
      else:
          for chain in mstat.sorted_MR_list:
              if os.path.isfile(mstat.chain_list[chain].unmod_modelPDB):
                    gT=gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, mstat.chain_list[chain].chainName + '_UNMOD', mstat.chain_list[chain].unmod_modelPDB, number)
                    gesamtModelList.append(gT)
                    count=count+1      
                    number=count
                    mstat.EnsTemplate="UNMOD"
      if gesamtModelList==[]:
          sys.stdout.write("Error: No pdb models found for Gesamt alignment\n")
          sys.stdout.write("\n")
          sys.exit()

      # take the first non-ensemble entry as alignment base and remove from list
      mstat.gesamtAlignModel = gesamtModelList[0].modelName
      alignModelPDB    = gesamtModelList[0].pdbFile

      # Set the sequence ID for the ensembles - it should be the seqID of the alignment base model
      mstat.gesamtEnsembleSeqID=mstat.chain_list[mstat.sorted_MR_list[0]].seqID

      # Set the Gesamt log file, alignment output file and the script file
      mstat.gesamtAlnFile=os.path.join(init.sequences_dir, "gesamtMultModelAlign.seq")
      logfile=os.path.join(init.logs_dir, "gesamtMultModelAlign.log")
      gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlign.sh")
      mstat.gesamtNumModels=len(gesamtModelList)

      # Set the name of the multi-model output pdb from Gesamt (Base Ensemble)
      mstat.gesamtBaseEnsemble=os.path.join(mstat.models_dir, "ensembles", "gesamtBaseAlignment.pdb")
      mstat.gesamtCSVFile=os.path.join(init.logs_dir, "gesamtCSVfile.csv")
      mstat.gesamtCSVCOREfile=os.path.join(init.logs_dir, "gesamtCSVCOREfile.csv")

      # Loop over the models and align them to the alignModel using Gesamt, then move them to the models directory
      if len(gesamtModelList) >= 2:
          pdbList=[]
          pdbDict=dict([])
          for gesamtModel in gesamtModelList[1:]:
              # Create the pdb list and dictionary for this alignment
              pdbList.append(gesamtModel.pdbFile)
              pdbDict[gesamtModel.pdbFile]=gesamtModel.chainID
    
          pdbList.append(alignModelPDB)
          pdbDict[alignModelPDB]=gesamtModelList[0].chainID
             
          # Run Gesamt to align to the master and copy to the models directory
          gesamtRun=MRBUMP_gesamt.Gesamt()
          gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, \
                              logfile=logfile, alnfile=mstat.gesamtAlnFile, MERGE_OUTPUT=False, \
                              script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG']))
          # Run it again but this time output all models to one PDB file
          gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlignOnePDB.sh")
          logfile=os.path.join(init.logs_dir, "gesamtMultModelAlignOnePDB.log")
          gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, outputPDB=mstat.gesamtBaseEnsemble, \
                              logfile=logfile, alnfile=mstat.gesamtAlnFile, csvfile=mstat.gesamtCSVFile, MERGE_OUTPUT=True, \
                              script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG']))

          for gesamtModel in gesamtModelList:
              gesamtAlignFile =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + "_" + str(gesamtModel.number) + ".pdb"
              gesamtOrigFile  =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + ".pdb"
              shutil.move(os.path.join(mstat.models_dir, gesamtAlignFile), os.path.join(mstat.models_dir, gesamtOrigFile))
              shutil.copyfile(os.path.join(mstat.models_dir, gesamtOrigFile), gesamtModel.pdbFile)

      # If there was only one model found in the search then don't run gesamt and just copy the model to the models directory
      else:
          if os.path.isfile(alignModelPDB):
              shutil.copyfile(alignModelPDB, os.path.join(mstat.models_dir, os.path.basename(alignModelPDB)))
          else:
              sys.stdout.write("Warning: could not find model file:\n  %s\n" % alignModelPDB)

   def makeGESAMTEnsembles(self, init, mstat):
       """ Make Gesamt variance-based truncated ensembles """

       slevel=10
       struncation="Cbeta"
       truncationList=[]
       if init.keywords.GTLEVEL != None:
           if isinstance(init.keywords.GTLEVEL, float) or isinstance(init.keywords.GTLEVEL, int): 
               sys.stdout.write("GTLEVEL has been set to %s. Outputing single truncated Gesamt ensemble at this truncation level\n" % str(init.keywords.GTLEVEL)) 
               truncationList=[str(init.keywords.GTLEVEL)]
           else:
               sys.stdout.write("Warning: GTLEVEL keyword has been given but is neither a float nor an integer. Defaulting to 10.0 Anstroms truncation level for Gesamt Ensemble\n")
               truncationList=["10.0"]
       else:
           #truncationList=["20.0", "10.0", "8.0", "6.0", "5.0", "4.0", "3.0", "2.0", "1.0", "0.8", "0.6", "0.5", "0.4", "0.3", "0.2", "0.1"]
           j=init.keywords.GTPERCENT
           while j <=100:
               truncationList.append(j)
               j=j+init.keywords.GTPERCENT

       gesamtRun=MRBUMP_gesamt.Gesamt()
       for i in truncationList: 	
           gesamtRun.makeGesTruncEnsemble(mstat.gesamtBaseEnsemble, os.path.join(mstat.models_dir, "ensembles", "gesamtEnsTrunc_%s_Side%s.pdb" % (i,struncation)), 
                                          variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFile, csvCOREfile=mstat.gesamtCSVCOREfile, truncation_level=float(i), sidechain_level=slevel)

   def makeAMPLEEnsembles(self, init, mstat):
       """ Make AMPLE style truncated ensembles """

       ensembles = self.ample_generate_ensembles(init, alignment_file=mstat.gesamtAlnFile + ".gesamt", logdir=os.path.join(init.search_dir,'logs'))

   def makeEnsembleDict(self, init, mstat):
       """ Make the dictionary entries for the Gesamt ensemble(s) """

       ensFileList=os.listdir(os.path.join(mstat.models_dir, "ensembles"))
       ensembleTuple=collections.namedtuple('ensembleModel', ['modelName', 'pdbFile', 'truncationLevel', 'pdbName', 'fileName']) 
       for pdbfile in ensFileList:
           if init.keywords.AMPTRUNC:
               if "_" in pdbfile and "gesamt" not in pdbfile.lower():
                   if string.split(pdbfile, "_")[0][0]=="e":
                       truncationLevel=string.split(pdbfile, "_")[0].replace("e", "")
               else:
                   truncationLevel="110"
               if int(truncationLevel) < 10:
                   pdbName="e00" + truncationLevel 
               elif int(truncationLevel) < 100:
                   pdbName="e0" + truncationLevel 
               else:
                   pdbName="e" + truncationLevel 
           if init.keywords.GESTRUNC:
               if "_Side" in pdbfile and "gesamt" in pdbfile.lower():
                   truncationLevel=string.split(pdbfile, "_")[1]
                   pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 
               elif "gesamtBaseAlignment" in pdbfile:  	
                   truncationLevel="All"
                   pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 
           if not init.keywords.GESTRUNC and not init.keywords.AMPTRUNC:
               truncationLevel="All"
               pdbName=os.path.splitext(os.path.basename(pdbfile))[0] 

           eT=ensembleTuple(os.path.splitext(pdbfile)[0], os.path.join(mstat.models_dir, "ensembles", pdbfile), truncationLevel, pdbName, pdbfile)
           mstat.ampleEnsembleList.append(eT)
           
           # Set up the model list entry
           m=Model_struct.Models_struct()
           m.setModelChainSource('ensemble_model')
           m.setPDBName(eT.pdbName)
           m.setModelName('ensemble_%s' % eT.pdbName)
           m.setModelType('ENSMBL')
           m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_%s'% eT.pdbName))
           m.PDBfile=[]
           m.PDBfile.append(eT.pdbFile)
           m.seqID.append(mstat.gesamtEnsembleSeqID)
           m.rms.append(mstat.chain_list[mstat.sorted_MR_list[0]].rms)
           m.num_per_ensem=1#mstat.gesamtNumModels
  
           # Create the directory structure
           os.mkdir(m.model_directory) 
           os.mkdir(os.path.join(m.model_directory, "mr")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "logs")) 
           os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) 
           if "MOLREP" in init.keywords.MR_PROGRAM_LIST:
              os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep'))
              os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine'))
           if "PHASER" in init.keywords.MR_PROGRAM_LIST:
              os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser'))
              os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine'))
           shutil.copyfile(eT.pdbFile, os.path.join(m.model_directory, eT.fileName))

           # Add the ensemble to the model list and create an entry in the results_dict      
           mstat.model_list[m.name]=m
         
           # Make the results dictionary
           import make_dictionary
           mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir)
       
           # Insert the ensemble at the top of the sorted list
           mstat.sorted_model_list.insert(0,m.name)

       # Set the number of AMPLE/Gesamt ensembles
       mstat.ampleNumModels=len(mstat.ampleEnsembleList)

   def ample_generate_ensembles(self, init, alignment_file=None, logdir=None):
        """ Make the truncated ensembles using AMPLE method """
        
        # TODO: requires treatment for ab initio truncation method. Only homologs work right now
        if ensembler_util is None:
            raise ImportError("Cannot import ample ensembler_util")

        logger = logging.getLogger()
        logger.setLevel(logging.DEBUG)
        # create file handler for debug output
        assert logdir
        logfile = os.path.join(logdir,'ample.log')
        fl = logging.FileHandler(logfile)
        fl.setLevel(logging.DEBUG)
        formatter = logging.Formatter('%(asctime)s - %(name)s [%(lineno)d] - %(levelname)s - %(message)s')
        fl.setFormatter(formatter)
        logger.addHandler(fl)
        
        # get the module for homologs ensembling
        ensemble_module = ensembler_util.find_ensembler_module({"homologs" : True, "single_model_mode" : False})
        ensembler = ensemble_module.Ensembler()
         
        ensembler.theseus_exe = os.path.join(os.environ['CCP4'], 'bin', 'theseus' + ample_util.EXE_EXT)
        ensembler.maxcluster_exe = ample_util.find_maxcluster({'maxcluster_exe' : None,
                                                               'rcdir' : os.path.join( os.path.expanduser("~"), ".ample") })
        ensembler.subcluster_exe = ensembler.maxcluster_exe
        ensembler.max_ensemble_models = 30
        cluster_exe = os.path.join(os.environ['CCP4'], 'bin', 'spicker' + ample_util.EXE_EXT)
        
        models_dir = os.path.join(init.search_dir, 'models')
        ensembles_directory = os.path.join(models_dir, 'ensembles')
        if not os.path.isdir(ensembles_directory): os.mkdir(ensembles_directory)
        work_dir = os.path.join(init.search_dir, 'models', 'ensemble_workdir')
        os.mkdir(work_dir)
        os.chdir(work_dir)
        models = glob.glob(os.path.join(models_dir, "*.pdb"))
        
        # Works specifically for homologs here. Kwargs would need to be changed to figure out otherwise
        ensembles = ensembler.generate_ensembles(models,
                                                 percent_truncation=5,
                                                 truncation_method='percent',
                                                 ensembles_directory=ensembles_directory,
                                                 alignment_file=alignment_file,
                                                 work_dir=work_dir,
                                                 nproc=1,
                                                 homolog_aligner='gesamt',
                                                 mustang_exe=None,
                                                 gesamt_exe=os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT),
                                                 side_chain_treatments=None
                                                )
        return ensembles