#! /usr/bin/env ccp4-python # # Copyright (C) 2005 Ronan Keegan # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Application. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # # # Setup Ensemble for Phaser MR # Ronan Keegan 12/12/04 # import logging import os, sys, string import shutil import mol_weight import subprocess, shlex import rtPDB if not os.environ.has_key('CCP4'): raise RuntimeError, 'CCP4 must be defined' import Model_struct import MRBUMP_gesamt import glob import collections try: from ample.util.foo import ample_util from ample.ensembler import ensembler_util except ImportError: ample_util = ensembler_util = None class Ensemble: """ A class to construct an ensemble of the top match PDBs for Phaser. """ def __init__(self): if os.name == "nt": self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose.exe') self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt.exe') self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset.exe') else: self.superposeEXE=os.path.join(os.environ['CBIN'], 'superpose') self.gesamtEXE=os.path.join(os.environ['CBIN'], 'gesamt') self.pdbsetEXE=os.path.join(os.environ['CBIN'], 'pdbset') self.SPxyz_template='' self.SPxyzin='' self.SPxyzout='' self.SP_logfile='' self.SP_logfile_last='' self.DB_fail=False self.num_per_ensem=0 self.base_MW=0 self.meanMW=0 self.total_MW_ensem=0 try: self.debug=eval(os.environ['MRBUMP_DEBUG']) except: self.debug=False #self.gesamtTuple=collections.namedtuple('GesamtModel', ['pdbID', 'chainID', 'domainID', 'modelName', 'pdbFile', 'number']) self.gesamtTuple=collections.namedtuple('GesamtModel', ['pdbID', 'chainID', 'domainID', 'modelName', 'pdbFile', 'unmodPDBFile', 'number']) self.gesamtModelList=[] def setDEBUG(self, flag): self.debug=flag def setSPXYZ_TEMPLATE(self, xyzin): self.SPxyz_template=xyzin def setSPXYZIN(self, xyzin): self.SPxyzin=xyzin def setSPXYZOUT(self, xyzout): self.SPxyzout=xyzout def setSPLOGFILE(self, filename): self.SP_logfile=filename def setSPLOGFILELAST(self, filename): self.SP_logfile_last=filename def setNumPerEnsem(self, num_per_ensem): self.num_per_ensem=num_per_ensem def setup_ensemble(self, mstat, init, target_info, domain=None, outputEnsemblesDir=""): """ A function to setup structure data for running Phaser with ensembles. Calls superpose/gesamt to line up each of the template structures with each other. """ if outputEnsemblesDir=="": outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles") if domain==None: workingList=mstat.sorted_MR_list else: workingList=target_info.targetDomainDict[domain].matches # Assingment of ensemble model to Gesamt ensemble model if init.keywords.GESENSEM: if init.keywords.AMPTRUNC: sys.stdout.write("Making Gesamt/AMPLE Ensembles\n") else: sys.stdout.write("Making Gesamt Ensemble\n") sys.stdout.write("\n") modelCount=self.makeGesamtEnsemble(init, mstat, target_info, domain=domain, outputEnsemblesDir=outputEnsemblesDir) if modelCount > 0: if init.keywords.GESTRUNC and len(workingList) >= 2: self.makeGESAMTEnsembles(init, mstat, domain=domain, outputEnsemblesDir=outputEnsemblesDir) for domainMatchName in target_info.targetDomainDict[domain].matches: # for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: if init.keywords.MDLSCULPTOR: tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1] mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLCHAINSAW: tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1] mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLMOLREP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1] mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLUNMOD: tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1] mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLDPDBCLP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1] mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLPLYALA: tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1] mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.GESTRUNC and len(workingList) < 2: domainMatchName=target_info.targetDomainDict[domain].matches[0] #for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: if init.keywords.MDLSCULPTOR: tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].sculptor_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLCHAINSAW: tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].chainsaw_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLMOLREP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].molrep_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLUNMOD: tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].unmod_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLDPDBCLP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].pdbclip_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLPLYALA: tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].plyala_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.GESTRUNC == False: domainMatchName=target_info.targetDomainDict[domain].matches[0] #for TYPE in [init.keywords.MDLSCULPTOR, init.keywords.MDLCHAINSAW, init.keywords.MDLMOLREP, init.keywords.MDLUNMOD, init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA]: if init.keywords.MDLSCULPTOR: tempFilename=os.path.split(mstat.chain_list[domainMatchName].sculptor_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].sculptor_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].sculptor_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLCHAINSAW: tempFilename=os.path.split(mstat.chain_list[domainMatchName].chainsaw_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].chainsaw_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].chainsaw_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLMOLREP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].molrep_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].molrep_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].molrep_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLUNMOD: tempFilename=os.path.split(mstat.chain_list[domainMatchName].unmod_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].unmod_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].unmod_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLDPDBCLP: tempFilename=os.path.split(mstat.chain_list[domainMatchName].pdbclip_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].pdbclip_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].pdbclip_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) elif init.keywords.MDLPLYALA: tempFilename=os.path.split(mstat.chain_list[domainMatchName].plyala_modelPDB)[-1] shutil.move(mstat.chain_list[domainMatchName].plyala_modelPDB, os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename)) mstat.chain_list[domainMatchName].plyala_modelPDB=os.path.join(mstat.models_dir, "domain_%s" % domain, "%s" % tempFilename) else: sys.stdout.write("Warning: no gesamt ensembles processed from this working list:\n %s\n" % (str(workingList))) # If we want to make Gesamt AMPLE ensembles if init.keywords.AMPTRUNC and len(mstat.sorted_MR_list) >= 2: self.makeAMPLEEnsembles(init, mstat) # Create the model dictionary entries for these ensembles self.makeEnsembleDict(init, mstat, target_info, domain=domain, outputEnsemblesDir=outputEnsemblesDir) sys.stdout.write("Number of Ensembles: %d\n" % mstat.ampleNumModels) sys.stdout.write("Number of search models in each Ensemble: %d\n" % mstat.gesamtNumModels) sys.stdout.write("\n") else: # ensemble count ecount=0 sys.stdout.write("Number of Ensembles: %d\n" % init.keywords.ENSNUM) sys.stdout.write("Number of search models in each Ensemble: %d\n" % init.keywords.ENSMODNUM) sys.stdout.write("\n") while ecount < init.keywords.ENSNUM: # model count mcount=0 # Set up the model list entry m=Model_struct.Models_struct() m.setNumPerEnsem(init.keywords.ENSMODNUM) m.setModelChainSource('ensemble_model') m.setPDBName('ensm') m.setModelName('ensemble_model%d'%ecount) m.setModelType('ENSMBL') if os.path.isdir(os.path.join(init.search_dir, 'data', 'domain_%s' % domain)) == False: os.mkdir(os.path.join(init.search_dir, 'data', 'domain_%s' % domain)) m.setModel_directory(os.path.join(init.search_dir, 'data', 'domain_%s' % domain, 'ensemble_model%d'%ecount)) # Create the directory structure os.mkdir(m.model_directory) os.mkdir(os.path.join(m.model_directory, "mr")) os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) os.mkdir(os.path.join(m.model_directory, "mr", "logs")) os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) if "MOLREP" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep')) os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine')) if "PHASER" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser')) os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine')) self.setSPLOGFILE(os.path.join(m.model_directory, 'mr', 'logs', 'superpose.log')) self.setSPLOGFILELAST(os.path.join(m.model_directory, 'mr', 'logs', 'superpose_last.log')) full_log=open(self.SP_logfile,'w') if ecount 10.0: sys.stdout.write("WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large\n" % percent_diff) sys.stdout.write("WARNING: Skipping model\n") sys.stdout.write("\n") continue else: if self.debug: sys.stdout.write("Current average molecular weight = %.1f\n" % self.meanMW) sys.stdout.write("Deviation of base alignment model from mean = %.2f %%\n" % percent_diff) self.total_MW_ensem += current_MW self.meanMW=self.total_MW_ensem/(mcount+1) #self.total_MW_ensem += current_MW #mean_MW_ensem = self.total_MW_ensem / (mcount+1) #percent_diff = (self.base_MW - mean_MW_ensem) * 100.0 / mean_MW_ensem #print "Molecular weight of alignment model = %.1f" % current_MW #if self.debug: # print "Current average molecular weight = %.1f" % mean_MW_ensem # print "Deviation of base alignment model from mean = %.2f %%" % percent_diff #if abs(percent_diff) > 10.0: # print "WARNING: Deviation of base alignment model MW (%.2f %%) from mean too large" % percent_diff # print "WARNING: Stopping here with %d models in the ensemble" % mcount # print "" # break self.setSPXYZIN(input_PDBFile) self.setSPXYZOUT(os.path.join(m.model_directory, 'mr', 'pdbfiles', mstat.chain_list[chain].chainName + '_superpose.pdb')) # Set the command line #command_line = self.superposeEXE + " " + self.SPxyzin + " " + self.SPxyz_template + " -p " + self.SPxyzout command_line = self.gesamtEXE + " " + self.SPxyzin + " " + self.SPxyz_template + " " + self.SPxyzout # Launch superpose if os.name == "nt": process_args = shlex.split(command_line, posix=False) p = subprocess.Popen(process_args, shell="True", stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) else: process_args = shlex.split(command_line) p = subprocess.Popen(process_args, stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.STDOUT) (o, i) = (p.stdout, p.stdin) i.close() # Capture the output from superpose log=open(self.SP_logfile_last, "w") line=o.readline() while line: log.write(line) line=o.readline() o.close() log.close() last_log=open(self.SP_logfile_last, 'r') line = last_log.readline() while line: # copy to full log file, that is kept full_log.write(line) # extract info from current log file if 'RMSD' in line and "=" in line: rmsd_index = string.split(line).index('RMSD') rmsd = float(string.split(line)[rmsd_index+2].replace(",", "")) align_length = int(string.split(line)[-1]) print "which aligns to base model with RMSD = %.3f over %d residues" % (rmsd,align_length) line = last_log.readline() last_log.close() # Add this pdb code to the ensemble list (along with its seq ID and rms value) m.PDBfile.append(self.SPxyzout) m.seqID.append(mstat.chain_list[chain].seqID) m.rms.append(mstat.chain_list[chain].rms) # When we reach the number of ensemble structures to be used, break out. mcount=mcount+1 if mcount >= m.num_per_ensem: print "" break # If there are too many models in each ensemble, reduce if mcount >= len(rotated_chain_list)-1: print "Warning: number of models in each ensemble reduced to %d to ensure ensemble uniqueness\n"%mcount m.num_per_ensem = mcount break full_log.close() if mcount < m.num_per_ensem: m.num_per_ensem = mcount # Add the ensemble to the model list and create an entry in the results_dict mstat.model_list[m.name]=m # Make the results dictionary import make_dictionary mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir) # Insert the ensemble at the top of the sorted list mstat.sorted_model_list.insert(0,m.name) ecount += 1 def gesamtPrep(self, MODELTYPE, modelList, mstat, count, domain=None): """ Prepare the Gesamt ensemble """ self.gesamtModelList=[] tempDict=dict([]) for chain in modelList: if MODELTYPE=="PDBCLP": pdbFile=mstat.chain_list[chain].pdbclip_modelPDB elif MODELTYPE=="PLYALA": pdbFile=mstat.chain_list[chain].plyala_modelPDB elif MODELTYPE=="UNMOD": pdbFile=mstat.chain_list[chain].unmod_modelPDB elif MODELTYPE=="MOLREP": pdbFile=mstat.chain_list[chain].molrep_modelPDB elif MODELTYPE=="CHNSAW": pdbFile=mstat.chain_list[chain].chainsaw_modelPDB elif MODELTYPE=="SCLPTR": pdbFile=mstat.chain_list[chain].sculptor_modelPDB if os.path.isfile(pdbFile): tempDict[chain]=pdbFile else: sys.stdout.write("Warning: Missing %s output file! Missing file:\n %s\n" % (MODELTYPE, pdbFile)) number=len(tempDict.keys()) for chain in tempDict.keys(): gT=self.gesamtTuple(mstat.chain_list[chain].PDBName, mstat.chain_list[chain].chainID, domain, \ mstat.chain_list[chain].chainName + '_%s' % MODELTYPE, tempDict[chain], mstat.chain_list[chain].PDBFile, number) self.gesamtModelList.append(gT) count=count+1 number=count #mstat.EnsTemplate=MODELTYPE return count def makeGesamtEnsemble(self, init, mstat, target_info, domain=None, outputEnsemblesDir=""): """ Make the Gesamt ensemble """ # Setup the output ensembles directory if not processing a domain if outputEnsemblesDir=="": outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles") # Set the list of chains to process, domain-specific or not if domain==None: workingList=mstat.sorted_MR_list else: workingList=target_info.targetDomainDict[domain].matches # Work out which template to use: Sculptor is top preference etc. modelTCount=0 for TYPE in [init.keywords.MDLDPDBCLP, init.keywords.MDLPLYALA, init.keywords.MDLUNMOD, init.keywords.MDLMOLREP, init.keywords.MDLCHAINSAW, init.keywords.MDLSCULPTOR]: if TYPE: mstat.EnsTemplate=mstat.modelTypeStringList[modelTCount] modelTCount=modelTCount+1 # Prepare the models for Gesamt alignment count=self.gesamtPrep(mstat.EnsTemplate, workingList, mstat, 0, domain=domain) if self.gesamtModelList==[]: sys.stdout.write("Error: No pdb models found for Gesamt alignment\n") sys.stdout.write("\n") sys.exit() # take the first non-ensemble entry as alignment base and remove from list mstat.gesamtAlignModel = self.gesamtModelList[0].modelName alignModelPDB = self.gesamtModelList[0].pdbFile # Set the sequence ID for the ensembles - it should be the seqID of the alignment base model mstat.gesamtEnsembleSeqID=mstat.chain_list[workingList[0]].seqID # Set the Gesamt log file, alignment output file and the script file mstat.gesamtAlnFile=os.path.join(init.sequences_dir, "gesamtMultModelAlign.seq") logfile=os.path.join(init.logs_dir, "gesamtMultModelAlign.log") gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlign.sh") mstat.gesamtNumModels=len(self.gesamtModelList) # Set the name of the multi-model output pdb from Gesamt (Base Ensemble) mstat.gesamtBaseEnsemble=os.path.join(outputEnsemblesDir, "gesamtBaseAlignment.pdb") #mstat.gesamtCSVFile=os.path.join(init.logs_dir, "gesamtCSVfile.csv") #mstat.gesamtCSVCOREfile=os.path.join(init.logs_dir, "gesamtCSVCOREfile.csv") mstat.gesamtCSVFileDict[domain]=os.path.join(init.logs_dir, "gesamtCSVfile_%d.csv" % domain) mstat.gesamtCSVCOREfileDict[domain]=os.path.join(init.logs_dir, "gesamtCSVCOREfile_%d.csv" % domain) # Loop over the models and align them to the alignModel using Gesamt, then move them to the models directory modelCount=0 if len(self.gesamtModelList) >= 2: pdbList=[] pdbDict=dict([]) for gesamtModel in self.gesamtModelList[1:]: # Create the pdb list and dictionary for this alignment pdbList.append(gesamtModel.pdbFile) pdbDict[gesamtModel.pdbFile]=gesamtModel.chainID pdbList.append(alignModelPDB) pdbDict[alignModelPDB]=self.gesamtModelList[0].chainID if mstat.EnsTemplate == "MOLREP": MODELTYPE="MOLREP" else: MODELTYPE="" # Run Gesamt to align to the master and copy to the models directory gesamtRun=MRBUMP_gesamt.Gesamt() gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, \ logfile=logfile, alnfile=mstat.gesamtAlnFile, MERGE_OUTPUT=False, \ TYPE=MODELTYPE, script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG'])) # Run it again but this time output all models to one PDB file gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlignOnePDB.sh") logfile=os.path.join(init.logs_dir, "gesamtMultModelAlignOnePDB.log") gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, outputPDB=mstat.gesamtBaseEnsemble, \ logfile=logfile, alnfile=mstat.gesamtAlnFile, csvfile=mstat.gesamtCSVFileDict[domain], MERGE_OUTPUT=True, \ TYPE=MODELTYPE, script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG'])) for gesamtModel in self.gesamtModelList: gesamtAlignFile =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + "_" + str(gesamtModel.number) + ".pdb" gesamtOrigFile =os.path.splitext(os.path.basename(gesamtModel.pdbFile))[0] + ".pdb" if os.path.isfile(os.path.join(mstat.models_dir, gesamtAlignFile)): shutil.move(os.path.join(mstat.models_dir, gesamtAlignFile), os.path.join(mstat.models_dir, gesamtOrigFile)) shutil.copyfile(os.path.join(mstat.models_dir, gesamtOrigFile), gesamtModel.pdbFile) for dom in target_info.targetDomainDict.keys(): if gesamtModel.modelName[0:7] in target_info.targetDomainDict[dom].matches: tempPDBFilename=os.path.split(gesamtModel.pdbFile)[-1] shutil.move(os.path.join(mstat.models_dir, gesamtOrigFile), os.path.join(mstat.models_dir, "domain_%s" % str(target_info.targetDomainDict[dom].ID), tempPDBFilename)) # rotate and tranlsate the unmodified version of the input pdb file if init.keywords.RTUNMOD: RTList=gesamtRun.extractCSVRTmatrix(csvFile=mstat.gesamtCSVFileDict[domain], alignedPDBFile=gesamtModel.pdbFile) rtPDB.pdbRotateTranslate(pdbin=gesamtModel.unmodPDBFile, pdbout=os.path.join(mstat.models_dir, "domain_%s" % str(target_info.targetDomainDict[dom].ID), "unmodified", os.path.split(gesamtModel.unmodPDBFile)[-1]), chainID=gesamtModel.chainID, RTmatrix=[float(x) for x in RTList[-1]]) modelCount=modelCount+1 else: sys.stdout.write("Ensemble file move: file missing:\n %s\n" % os.path.join(mstat.models_dir, gesamtAlignFile)) # If there was only one model found in the search then don't run gesamt and just copy the model to the models directory else: if os.path.isfile(alignModelPDB): shutil.copyfile(alignModelPDB, os.path.join(mstat.models_dir, "domain_%s" % str(self.gesamtModelList[0].domainID), os.path.split(alignModelPDB)[-1])) # copy in the unmodified version of the input pdb file. Here the position is the same as no alignment has been made if init.keywords.RTUNMOD: shutil.copyfile(self.gesamtModelList[0].unmodPDBFile, os.path.join(mstat.models_dir, "domain_%s" % str(self.gesamtModelList[0].domainID), "unmodified", os.path.split(self.gesamtModelList[0].unmodPDBFile)[-1])) modelCount=modelCount+1 else: sys.stdout.write("Warning: could not find model file:\n %s\n" % alignModelPDB) return modelCount def makeGESAMTEnsembles(self, init, mstat, domain=None, outputEnsemblesDir=""): """ Make Gesamt variance-based truncated ensembles """ if outputEnsemblesDir=="": outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles") slevel=10 struncation="Cbeta" truncationList=[] if init.keywords.GTLEVEL != None: if isinstance(init.keywords.GTLEVEL, float) or isinstance(init.keywords.GTLEVEL, int): sys.stdout.write("GTLEVEL has been set to %s. Outputing single truncated Gesamt ensemble at this truncation level\n" % str(init.keywords.GTLEVEL)) truncationList=[str(init.keywords.GTLEVEL)] else: sys.stdout.write("Warning: GTLEVEL keyword has been given but is neither a float nor an integer. Defaulting to 10.0 Anstroms truncation level for Gesamt Ensemble\n") truncationList=["10.0"] else: #truncationList=["20.0", "10.0", "8.0", "6.0", "5.0", "4.0", "3.0", "2.0", "1.0", "0.8", "0.6", "0.5", "0.4", "0.3", "0.2", "0.1"] j=init.keywords.GTPERCENT while j <=100: truncationList.append(j) j=j+init.keywords.GTPERCENT gesamtRun=MRBUMP_gesamt.Gesamt() for i in truncationList: if domain == None: gesamtRun.makeGesTruncEnsemble(mstat.gesamtBaseEnsemble, os.path.join(outputEnsemblesDir, "gesamtEnsTrunc_%s_Side%s.pdb" % (i, struncation)), variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain], truncation_level=float(i), sidechain_level=slevel) else: gesamtRun.makeGesTruncEnsemble(mstat.gesamtBaseEnsemble, os.path.join(outputEnsemblesDir, "gesamtEnsTrunc_%s_%s_Side%s.pdb" % (domain, i, struncation)), variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain], truncation_level=float(i), sidechain_level=slevel) def makeAMPLEEnsembles(self, init, mstat): """ Make AMPLE style truncated ensembles """ ensembles = self.ample_generate_ensembles(init, alignment_file=mstat.gesamtAlnFile + ".gesamt", logdir=os.path.join(init.search_dir,'logs')) def makeEnsembleDict(self, init, mstat, target_info, domain=None, outputEnsemblesDir=""): """ Make the dictionary entries for the Gesamt ensemble(s) """ # Setup the output ensembles directory if not processing a domain if outputEnsemblesDir=="": outputEnsemblesDir=os.path.join(mstat.models_dir, "ensembles") # Set the list of chains to process, domain-specific or not if domain==None: workingList=mstat.sorted_MR_list else: workingList=target_info.targetDomainDict[domain].matches ensFileList=os.listdir(outputEnsemblesDir) ensembleTuple=collections.namedtuple('ensembleModel', ['modelName', 'pdbFile', 'truncationLevel', 'pdbName', 'fileName']) for pdbfile in ensFileList: if init.keywords.AMPTRUNC: if "_" in pdbfile and "gesamt" not in pdbfile.lower(): if string.split(pdbfile, "_")[0][0]=="e": truncationLevel=string.split(pdbfile, "_")[0].replace("e", "") else: truncationLevel="110" if int(truncationLevel) < 10: pdbName="e00" + truncationLevel elif int(truncationLevel) < 100: pdbName="e0" + truncationLevel else: pdbName="e" + truncationLevel if init.keywords.GESTRUNC: if "_Side" in pdbfile and "gesamt" in pdbfile.lower(): truncationLevel=string.split(pdbfile, "_")[1] pdbName=os.path.splitext(os.path.basename(pdbfile))[0] elif "gesamtBaseAlignment" in pdbfile: truncationLevel="All" pdbName=os.path.splitext(os.path.basename(pdbfile))[0] if not init.keywords.GESTRUNC and not init.keywords.AMPTRUNC: truncationLevel="All" pdbName=os.path.splitext(os.path.basename(pdbfile))[0] # We still produce the CSVCORE for ccp4mg if init.keywords.GESENSEM: gesamtRun=MRBUMP_gesamt.Gesamt() gesamtRun.makeGesTruncFile(variancePercent=init.keywords.GTPERCENT, csvFile=mstat.gesamtCSVFileDict[domain], csvCOREfile=mstat.gesamtCSVCOREfileDict[domain]) eT=ensembleTuple(os.path.splitext(pdbfile)[0], os.path.join(outputEnsemblesDir, pdbfile), truncationLevel, pdbName, pdbfile) mstat.ampleEnsembleList.append(eT) # Set up the model list entry m=Model_struct.Models_struct() m.setModelChainSource('ensemble_model') m.targetDomain=domain m.myPHTargetDomain=domain m.setPDBName(eT.pdbName) m.setModelType('ENSMBL') if domain == None: m.setModelName('ensemble_%s' % eT.pdbName) m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_%s'% eT.pdbName)) else: m.setModelName('ensemble_%s_%s' % (eT.pdbName, domain)) m.setModel_directory(os.path.join(init.search_dir, 'data', 'ensemble_%s_%d'% (eT.pdbName, domain))) m.PDBfile=[] m.PDBfile.append(eT.pdbFile) m.seqID.append(mstat.gesamtEnsembleSeqID) m.rms.append(mstat.chain_list[workingList[0]].rms) m.num_per_ensem=1#mstat.gesamtNumModels # Create the directory structure os.mkdir(m.model_directory) os.mkdir(os.path.join(m.model_directory, "mr")) os.mkdir(os.path.join(m.model_directory, "mr", "pdbfiles")) os.mkdir(os.path.join(m.model_directory, "mr", "logs")) os.mkdir(os.path.join(m.model_directory, "mr", "tmp")) if "MOLREP" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep')) os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine')) if "PHASER" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser')) os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine')) shutil.copyfile(eT.pdbFile, os.path.join(m.model_directory, eT.fileName)) # Add the ensemble to the model list and create an entry in the results_dict mstat.model_list[m.name]=m # Make the results dictionary import make_dictionary mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir) # Insert the ensemble at the top of the sorted list mstat.sorted_model_list.insert(0,m.name) # Set the number of AMPLE/Gesamt ensembles mstat.ampleNumModels=len(mstat.ampleEnsembleList) def ample_generate_ensembles(self, init, alignment_file=None, logdir=None): """ Make the truncated ensembles using AMPLE method """ # TODO: requires treatment for ab initio truncation method. Only homologs work right now if ensembler_util is None: raise ImportError("Cannot import ample ensembler_util") logger = logging.getLogger() logger.setLevel(logging.DEBUG) # create file handler for debug output assert logdir logfile = os.path.join(logdir,'ample.log') fl = logging.FileHandler(logfile) fl.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s [%(lineno)d] - %(levelname)s - %(message)s') fl.setFormatter(formatter) logger.addHandler(fl) # get the module for homologs ensembling ensemble_module = ensembler_util.find_ensembler_module({"homologs" : True, "single_model_mode" : False}) ensembler = ensemble_module.Ensembler() ensembler.theseus_exe = os.path.join(os.environ['CCP4'], 'bin', 'theseus' + ample_util.EXE_EXT) ensembler.maxcluster_exe = ample_util.find_maxcluster({'maxcluster_exe' : None, 'rcdir' : os.path.join( os.path.expanduser("~"), ".ample") }) ensembler.subcluster_exe = ensembler.maxcluster_exe ensembler.max_ensemble_models = 30 cluster_exe = os.path.join(os.environ['CCP4'], 'bin', 'spicker' + ample_util.EXE_EXT) models_dir = os.path.join(init.search_dir, 'models') ensembles_directory = os.path.join(models_dir, 'ensembles') if not os.path.isdir(ensembles_directory): os.mkdir(ensembles_directory) work_dir = os.path.join(init.search_dir, 'models', 'ensemble_workdir') os.mkdir(work_dir) os.chdir(work_dir) models = glob.glob(os.path.join(models_dir, "*.pdb")) # Works specifically for homologs here. Kwargs would need to be changed to figure out otherwise ensembles = ensembler.generate_ensembles(models, percent_truncation=5, truncation_method='percent', ensembles_directory=ensembles_directory, alignment_file=alignment_file, work_dir=work_dir, nproc=1, homolog_aligner='gesamt', mustang_exe=None, gesamt_exe=os.path.join(os.environ['CCP4'], 'bin', 'gesamt' + ample_util.EXE_EXT), side_chain_treatments=None ) return ensembles