#! /usr/bin/env ccp4-python # # Copyright (C) 2005 Ronan Keegan # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Application. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # # # Setup scripts for the MR pipeline # Ronan Keegan 21/12/04 import os, sys, string import shutil import smartie class Models_struct: """ A class structure to hold model details. """ def __init__(self): self.chain_source='' self.source='' self.PDBName='' self.seqID=[] self.rms=[] self.COPIES_SET=False self.no_copies=1 self.num_mols_found=0 self.num_per_ensem=1 self.number_mols=1 self.number_domains=1 self.targetDomain=1 self.myPHTargetDomain=0 self.myHHTargetDomain=0 self.resolution=None self.name='' self.type='' self.model_info_string='' self.model_directory='' self.type_extension='pdb' self.multi_type='HOMO' self.PDBfile=[] self.domain_PDBfile="" self.PHscore=0.0 self.HHscore=0.0 self.resolution_high=0.0 self.resolution_low=0.0 self.MR_jobID=0 self.MR_job_number=1 self.MRPROGRAM='' self.Refinement_program='' self.marginal_solution_MOLREP=False self.good_solution_MOLREP=False self.poor_solution_MOLREP=False self.solution_type_MOLREP="NA" self.marginal_solution_PHASER=False self.good_solution_PHASER=False self.poor_solution_PHASER=False self.solution_type_PHASER="NA" self.refinement_HKLIN="" self.enant_solution=False self.phaser_smartie_log=None #self.phaser_keywords=dict([]) self.phaser_submitted=False self.phaser_keywords=[] self.phaser_PDBfile='' self.phaser_MTZfile='' self.phaser_jobname='' self.phaser_keyfile='' self.phaser_scriptfile='' self.phaser_jobID=0 self.phaser_logfile='' self.phaser_solnfile='' self.phaser_LLG_score=0.0 self.phaser_Zscore=0.0 self.phaser_summary='' self.phaser_soln_spacegroup="" self.molrep_smartie_log=None self.molrep_keywords=dict([]) self.molrep_submitted=False self.molrep_PDBfile='' self.molrep_MTZfile='' self.molrep_jobname='' self.molrep_keyfile='' self.molrep_scriptfile1='' self.molrep_scriptfile2='' self.molrep_RFfile='' self.molrep_jobID=0 self.molrep_logfile='' self.molrep_solnfile='' self.molrep_Corr=0.0 self.molrep_Rfac=0.0 self.molrep_summary='' self.molrep_contrast=0.0 self.molrep_time=0 ############ Refmac Rigid Body run parameters ############ self.refmacRB_molrep_smartie_log=None self.refmacRB_phaser_smartie_log=None self.refmacRB_molrep_keywords=dict([]) self.refmacRB_phaser_keywords=dict([]) self.refmacRB_molrep_PDBfile='' self.refmacRB_molrep_MTZINfile='' self.refmacRB_molrep_MTZOUTfile='' self.refmacRB_molrep_jobname='' self.refmacRB_molrep_keyfile='' self.refmacRB_molrep_jobID=0 self.refmacRB_molrep_logfile='' self.refmacRB_molrep_initRfact=1.0 self.refmacRB_molrep_finlRfact=1.0 self.refmacRB_molrep_initRfree=1.0 self.refmacRB_molrep_finlRfree=1.0 self.refmacRB_molrep_summary='' self.refmacRB_phaser_PDBfile='' self.refmacRB_phaser_MTZINfile='' self.refmacRB_phaser_MTZOUTfile='' self.refmacRB_phaser_jobname='' self.refmacRB_phaser_keyfile='' self.refmacRB_phaser_jobID=0 self.refmacRB_phaser_logfile='' self.refmacRB_phaser_initRfact=1.0 self.refmacRB_phaser_finlRfact=1.0 self.refmacRB_phaser_initRfree=1.0 self.refmacRB_phaser_finlRfree=1.0 self.refmacRB_phaser_summary='' ############ Refmac Restrained run parameters ############ self.refmac_molrep_smartie_log=None self.refmac_phaser_smartie_log=None self.refmac_molrep_keywords=dict([]) self.refmac_phaser_keywords=dict([]) self.refmac_molrep_PDBfile='' self.refmac_molrep_MTZINfile='' self.refmac_molrep_MTZOUTfile='' self.refmac_molrep_jobname='' self.refmac_molrep_keyfile='' self.refmac_molrep_jobID=0 self.refmac_molrep_logfile='' self.refmac_molrep_initRfact=1.0 self.refmac_molrep_finlRfact=1.0 self.refmac_molrep_initRfree=1.0 self.refmac_molrep_finlRfree=1.0 self.refmac_molrep_summary='' self.refmac_phaser_PDBfile='' self.refmac_phaser_MTZINfile='' self.refmac_phaser_MTZOUTfile='' self.refmac_phaser_jobname='' self.refmac_phaser_keyfile='' self.refmac_phaser_jobID=0 self.refmac_phaser_logfile='' self.refmac_phaser_initRfact=1.0 self.refmac_phaser_finlRfact=1.0 self.refmac_phaser_initRfree=1.0 self.refmac_phaser_finlRfree=1.0 self.refmac_phaser_summary='' self.buccaneer_molrep_directory='' self.buccaneer_molrep_logfile='' self.buccaneer_molrep_PDBOUTfile='' self.buccaneer_molrep_PDBrefinedfile='' self.buccaneer_molrep_MTZrefinedfile='' self.buccaneer_molrep_res_built=0 self.buccaneer_molrep_completeness=0.0 self.buccaneer_molrep_initRfact=1.0 self.buccaneer_molrep_finalRfact=1.0 self.buccaneer_molrep_initRfree=1.0 self.buccaneer_molrep_finalRfree=1.0 self.buccaneer_phaser_directory='' self.buccaneer_phaser_logfile='' self.buccaneer_phaser_PDBOUTfile='' self.buccaneer_phaser_PDBrefinedfile='' self.buccaneer_phaser_MTZrefinedfile='' self.buccaneer_phaser_res_built=0 self.buccaneer_phaser_completeness=0.0 self.buccaneer_phaser_initRfact=1.0 self.buccaneer_phaser_finalRfact=1.0 self.buccaneer_phaser_initRfree=1.0 self.buccaneer_phaser_finalRfree=1.0 self.arpwarp_molrep_directory='' self.arpwarp_molrep_logfile='' self.arpwarp_molrep_PDBOUTfile='' self.arpwarp_molrep_MTZOUTfile='' self.arpwarp_molrep_res_built=0 self.arpwarp_molrep_completeness=0.0 self.arpwarp_molrep_initRfact=1.0 self.arpwarp_molrep_finalRfact=1.0 self.arpwarp_molrep_initRfree=1.0 self.arpwarp_molrep_finalRfree=1.0 self.arpwarp_phaser_directory='' self.arpwarp_phaser_logfile='' self.arpwarp_phaser_PDBOUTfile='' self.arpwarp_phaser_MTZOUTfile='' self.arpwarp_phaser_res_built=0 self.arpwarp_phaser_completeness=0.0 self.arpwarp_phaser_initRfact=1.0 self.arpwarp_phaser_finalRfact=1.0 self.arpwarp_phaser_initRfree=1.0 self.arpwarp_phaser_finalRfree=1.0 self.shelxe_molrep_PDBfile='' self.shelxe_molrep_PHSfile='' self.shelxe_molrep_MTZfile='' self.shelxe_molrep_logfile='' self.shelxe_molrep_CCscore=0.0 self.shelxe_molrep_AvgChainLen=0.0 self.buccSHELXE_molrep_PDBfile='' self.buccSHELXE_molrep_MTZfile='' self.shelxe_phaser_PDBfile='' self.shelxe_phaser_PHSfile='' self.shelxe_phaser_MTZfile='' self.shelxe_phaser_logfile='' self.shelxe_phaser_CCscore=0.0 self.shelxe_phaser_AvgChainLen=0.0 self.buccSHELXE_phaser_PDBfile='' self.buccSHELXE_phaser_MTZfile='' self.acorn_logfile="" self.acorn_MTZOUTfile="" self.acorn_XYZINfile="" self.acorn_cc_values=0.0 self.acorn_cycles=0 self.cpirate_MTZfile="" self.cpirate_logfile="" self.cpirate_cmdfile="" self.bucc_ref_logfile="" self.bucc_ref_cmdfile="" self.bucc_MTZINfile="" self.bucc_MTZOUTfile="" self.bucc_refmac_PDBOUTfile="" self.mr_pdb_sum_logfile='' self.pdbcur_sum_logfile='' def setModelChainSource(self, chain_source): self.chain_source=chain_source def setModelSource(self, source): self.source=source def setPDBName(self, PDBName): self.PDBName=PDBName def setNumPerEnsem(self, num_per_ensem): self.num_per_ensem=num_per_ensem def setNumberMols(self, number_mols): self.number_mols=number_mols def setNumberDomains(self, number): self.number_domains=number def setModelName(self, name): self.name=name def setModelType(self, type): self.type=type def setModel_directory(self, model_directory): self.model_directory=model_directory def setModelTypeExtension(self, type_extension): self.type_extension=type_extension def setMultiType(self, multi_type): self.multi_type=multi_type def setMR_JobID(self, MR_jobID): self.MR_jobID=MR_jobID def setMR_JobNumber(self, MR_job_number): self.MR_job_number=MR_job_number def setMRPROGRAM(self, MRPROGRAM): self.MRPROGRAM=MRPROGRAM def setRefinement_Program(self, Refinement_program): self.Refinement_program=Refinement_program def setPhaserPDBfile(self, phaser_pdbfile): self.phaser_PDBfile=phaser_pdbfile def setPhaserMTZfile(self, phaser_mtzfile): self.phaser_MTZfile=phaser_mtzfile def setPhaserJobName(self, phaser_jobname): self.phaser_jobname=phaser_jobname def setPhaserKeyFile(self, phaser_keyfile): self.phaser_keyfile=phaser_keyfile def setPhaserJobID(self, phaser_jobID): self.phaser_jobID=phaser_jobID def setPhaserLogFile(self, phaser_logfile): self.phaser_logfile=phaser_logfile def setPhaserScriptFile(self, filename): self.phaser_scriptfile=filename def setPhaserSolnFile(self, phaser_solnfile): self.phaser_solnfile=phaser_solnfile def setPhaserLLGscore(self, phaser_LLG_score): self.phaser_LLG_score=phaser_LLG_score def setPhaserZscore(self, phaser_Zscore): self.phaser_Zscore=phaser_Zscore def setPhaserSummary(self, summary): self.phaser_summary=summary def setPhaserSolnSG(self, SG): self.phaser_soln_spacegroup=SG def setPhaserSubmitted(self, bool): self.phaser_submitted=bool def isPhaserSubmitted(self): return self.phaser_submitted def setMolrepPDBfile(self, molrep_pdbfile): self.molrep_PDBfile=molrep_pdbfile def setMolrepMTZfile(self, molrep_mtzfile): self.molrep_MTZfile=molrep_mtzfile def setMolrepJobName(self, molrep_jobname): self.molrep_jobname=molrep_jobname def setMolrepKeyFile(self, molrep_keyfile): self.molrep_keyfile=molrep_keyfile def setMolrepJobID(self, molrep_jobID): self.molrep_jobID=molrep_jobID def setMolrepLogFile(self, molrep_logfile): self.molrep_logfile=molrep_logfile def setMolrepScriptFile1(self, filename): self.molrep_scriptfile1=filename def setMolrepScriptFile2(self, filename): self.molrep_scriptfile2=filename def setMolrepSolnFile(self, molrep_solnfile): self.molrep_solnfile=molrep_solnfile def setMolrepCorr(self, molrep_Corr): self.molrep_Corr=molrep_Corr def setMolrepRfac(self, molrep_Rfac): self.molrep_Rfac=molrep_Rfac def setMolrepSummary(self, summary): self.molrep_summary=summary def setMolrepSubmitted(self, bool): self.molrep_submitted=bool def isMolrepSubmitted(self): return self.molrep_submitted ############################## # Refmac Rigid Body: ############################## # RefmacRB variables for Molrep def setRefmacRBMolrepPDBfile(self, filename): self.refmacRB_molrep_PDBfile=filename def setRefmacRBMolrepMTZINfile(self, filename): self.refmacRB_molrep_MTZINfile=filename def setRefmacRBMolrepMTZOUTfile(self, filename): self.refmacRB_molrep_MTZOUTfile=filename def setRefmacRBMolrepJobName(self, name): self.refmacRB_molrep_jobname=name def setRefmacRBMolrepKeyFile(self, filename): self.refmacRB_molrep_keyfile=filename def setRefmacRBMolrepJobID(self, ID): self.refmacRB_molrep_jobID=ID def setRefmacRBMolrepLogFile(self, filename): self.refmacRB_molrep_logfile=filename def setRefmacRBMolrepSummary(self, summary): self.refmacRB_molrep_summary=summary # RefmacRB variables for Phaser def setRefmacRBPhaserPDBfile(self, filename): self.refmacRB_phaser_PDBfile=filename def setRefmacRBPhaserMTZINfile(self, filename): self.refmacRB_phaser_MTZINfile=filename def setRefmacRBPhaserMTZOUTfile(self, filename): self.refmacRB_phaser_MTZOUTfile=filename def setRefmacRBPhaserJobName(self, name): self.refmacRB_phaser_jobname=name def setRefmacRBPhaserKeyFile(self, filename): self.refmacRB_phaser_keyfile=filename def setRefmacRBPhaserJobID(self, ID): self.refmacRB_phaser_jobID=ID def setRefmacRBPhaserLogFile(self, filename): self.refmacRB_phaser_logfile=filename def setRefmacRBPhaserSummary(self, summary): self.refmacRB_phaser_summary=summary ############################## # Refmac Restrained: ############################## # Refmac variables for Molrep def setRefmacMolrepPDBfile(self, filename): self.refmac_molrep_PDBfile=filename def setRefmacMolrepMTZINfile(self, filename): self.refmac_molrep_MTZINfile=filename def setRefmacMolrepMTZOUTfile(self, filename): self.refmac_molrep_MTZOUTfile=filename def setRefmacMolrepJobName(self, name): self.refmac_molrep_jobname=name def setRefmacMolrepKeyFile(self, filename): self.refmac_molrep_keyfile=filename def setRefmacMolrepJobID(self, ID): self.refmac_molrep_jobID=ID def setRefmacMolrepLogFile(self, filename): self.refmac_molrep_logfile=filename def setRefmacMolrepSummary(self, summary): self.refmac_molrep_summary=summary # Refmac variables for Phaser def setRefmacPhaserPDBfile(self, filename): self.refmac_phaser_PDBfile=filename def setRefmacPhaserMTZINfile(self, filename): self.refmac_phaser_MTZINfile=filename def setRefmacPhaserMTZOUTfile(self, filename): self.refmac_phaser_MTZOUTfile=filename def setRefmacPhaserJobName(self, name): self.refmac_phaser_jobname=name def setRefmacPhaserKeyFile(self, filename): self.refmac_phaser_keyfile=filename def setRefmacPhaserJobID(self, ID): self.refmac_phaser_jobID=ID def setRefmacPhaserLogFile(self, filename): self.refmac_phaser_logfile=filename def setRefmacPhaserSummary(self, summary): self.refmac_phaser_summary=summary def setBuccaneerMolrepWorkingDIR(self, dirname): self.buccaneer_molrep_directory=dirname def setBuccaneerMolrepPDBOUTfile(self, filename): self.buccaneer_molrep_PDBOUTfile=filename def setBuccaneerMolrepPDBrefinedfile(self, filename): self.buccaneer_molrep_PDBrefinedfile=filename def setBuccaneerMolrepMTZrefinedfile(self, filename): self.buccaneer_molrep_MTZrefinedfile=filename def setBuccaneerPhaserWorkingDIR(self, dirname): self.buccaneer_phaser_directory=dirname def setBuccaneerPhaserPDBOUTfile(self, filename): self.buccaneer_phaser_PDBOUTfile=filename def setBuccaneerPhaserPDBrefinedfile(self, filename): self.buccaneer_phaser_PDBrefinedfile=filename def setBuccaneerPhaserMTZrefinedfile(self, filename): self.buccaneer_phaser_MTZrefinedfile=filename def setArpwarpMolrepWorkingDIR(self, dirname): self.arpwarp_molrep_directory=dirname def setArpwarpMolrepPDBOUTfile(self, filename): self.arpwarp_molrep_PDBOUTfile=filename def setArpwarpMolrepMTZOUTfile(self, filename): self.arpwarp_molrep_MTZOUTfile=filename def setArpwarpPhaserWorkingDIR(self, dirname): self.arpwarp_phaser_directory=dirname def setArpwarpPhaserPDBOUTfile(self, filename): self.arpwarp_phaser_PDBOUTfile=filename def setArpwarpPhaserMTZOUTfile(self, filename): self.arpwarp_phaser_MTZOUTfile=filename def setShelxeMolrepPDBfile(self, filename): self.shelxe_molrep_PDBfile=filename def setShelxeMolrepPHSfile(self, filename): self.shelxe_molrep_PHSfile=filename def setShelxeMolrepMTZfile(self, filename): self.shelxe_molrep_MTZfile=filename def setShelxePhaserPDBfile(self, filename): self.shelxe_phaser_PDBfile=filename def setShelxePhaserPHSfile(self, filename): self.shelxe_phaser_PHSfile=filename def setShelxePhaserMTZfile(self, filename): self.shelxe_phaser_MTZfile=filename def setEcalcLogfile(self, filename): self.ecalc_logfile=filename def setEcalcMTZOUTfile(self, filename): self.ecalc_MTZOUTfile=filename def setAcornLogfile(self, filename): self.acorn_logfile=filename def setAcornMTZOUTfile(self, filename): self.acorn_MTZOUTfile=filename def setCpirateMTZfile(self, filename): self.cpirate_MTZfile=filename def setCpirateLogfile(self, filename): self.cpirate_logfile=filename def setCpirateCmdfile(self, filename): self.cpirate_cmdfile=filename def setbucc_ref_logfile(self, filename): self.bucc_ref_logfile=filename def setbucc_ref_cmdfile(self, filename): self.bucc_ref_cmdfile=filename def setbucc_MTZINfile(self, filename): self.bucc_MTZINfile=filename def setbucc_MTZOUTfile(self, filename): self.bucc_MTZOUTfile=filename def setbucc_refmac_PDBOUTfile(self, filename): self.bucc_refmac_PDBOUTfile=filename def setMRPDBSummaryFile(self, filename): self.mr_pdb_sum_logfile=filename def setPDBcurSummaryFile(self, filename): self.pdbcur_sum_logfile=filename class MR_setup: """ A class to setup various structures and lists for running the MR/Refmac pipeline. """ def __init__(self): try: self.debug=eval(os.environ['MRBUMP_DEBUG']) except: self.debug=False # def ample_generate_ensembles(self, init, alignment_file=None): # ensembler = ample_ensemble.Ensembler() # # ensembler.theseus_exe = os.path.join(os.environ['CCP4'],'bin','theseus') # ensembler.maxcluster_exe = ample_util.find_maxcluster({'maxcluster_exe' : None, # 'rcdir' : os.path.join( os.path.expanduser("~"), ".ample") }) # ensembler.subcluster_exe = ensembler.maxcluster_exe # ensembler.max_ensemble_models = 30 # cluster_exe = os.path.join(os.environ['CCP4'],'bin','spicker') # # models_dir = os.path.join(init.search_dir, 'models') # ensembles_directory = os.path.join(models_dir, 'ensembles') # if not os.path.isdir(ensembles_directory): os.mkdir(ensembles_directory) # work_dir = os.path.join(init.search_dir, 'models', 'ensemble_workdir') # os.mkdir(work_dir) # os.chdir(work_dir) # models = glob.glob(os.path.join(models_dir, "*.pdb")) # if True: # ensembles = ensembler.generate_ensembles_homologs(models, # percent_truncation=5, # truncation_method='percent', # ensembles_directory=ensembles_directory, # alignment_file=alignment_file, # work_dir=work_dir, # nproc=1, # homolog_aligner='gesamt', # mustang_exe=None, # gesamt_exe=os.path.join(os.environ['CCP4'],'bin','gesamt') # ) # else: # ensembles = ensembler.generate_ensembles(models, # cluster_method=amoptd['cluster_method'], # cluster_exe=cluster_exe, # num_clusters=amoptd['num_clusters'], # import_cluster=amoptd['import_cluster'], # cluster_dir=amoptd['cluster_dir'], # percent_truncation=amoptd['percent'], # truncation_method=amoptd['truncation_method'], # truncation_pruning=amoptd['truncation_pruning'], # ensembles_directory=ensembles_directory, # work_dir=work_dir, # nproc=amoptd['nproc']) # return ensembles def setDEBUG(self, flag): self.debug=flag def prepModelPDB(self, init, mstat, chain, modelStruct, MODELTYPE, prepPDBFile): """ Prepare the placing of pdb file models """ model_exists=True if os.path.isfile(prepPDBFile): modelStruct.PDBfile.append(prepPDBFile) if mstat.EnsTemplate != MODELTYPE: if init.keywords.DOPHMMER: shutil.copyfile(prepPDBFile, os.path.join(mstat.models_dir, "domain_%s" % mstat.chain_list[chain].myPHTargetDomain, os.path.basename(prepPDBFile))) else: shutil.copyfile(prepPDBFile, os.path.join(mstat.models_dir, os.path.basename(prepPDBFile))) modelStruct.setModel_directory(os.path.join(init.search_dir, 'data', mstat.chain_list[chain].chainName, mstat.modelTypeStringDict[MODELTYPE])) else: model_exists=False return model_exists def setTypes(self, init, mstat, target_info, mrsearchdir): """ A function to define the model types if the models are not part of an ensemble or a PQS multimer. PQS-based multimers are constructed later, but PISA-based multimers should be included here. """ mstat.num_MR_models = 0 #for chain in mstat.sorted_MODEL_list: for chain in mstat.sorted_MR_list: for i in init.model_types: m=Models_struct() m.setModelChainSource(mstat.chain_list[chain].chainName) m.setModelSource(mstat.chain_list[chain].source) m.setPDBName(mstat.chain_list[chain].PDBName) m.seqID.append(mstat.chain_list[chain].seqID) if mstat.chain_list[chain].source == "LOC": m.rms.append(mstat.chain_list[chain].rms) if mstat.chain_list[chain].COPIES_SET: m.COPIES_SET = True m.no_copies = mstat.chain_list[chain].no_copies m.setModelType(i) m.setModelName(mstat.chain_list[chain].chainName + '_' + i) if mstat.chain_list[chain].multimer_type == 'MULTIMER': m.setNumberMols(mstat.chain_list[chain].number_monomers) m.resolution_high = mstat.chain_list[chain].resolution_high m.resolution_low = mstat.chain_list[chain].resolution_low model_exists=True if i == 'UNMOD': model_exists=self.prepModelPDB(init, mstat, chain, m, 'UNMOD', mstat.chain_list[chain].unmod_modelPDB) elif i == 'PDBCLP': model_exists=self.prepModelPDB(init, mstat, chain, m, 'PDBCLP', mstat.chain_list[chain].pdbclip_modelPDB) elif i == 'MOLREP': model_exists=self.prepModelPDB(init, mstat, chain, m, 'MOLREP', mstat.chain_list[chain].molrep_modelPDB) elif i == 'CHNSAW': model_exists=self.prepModelPDB(init, mstat, chain, m, 'CHNSAW', mstat.chain_list[chain].chainsaw_modelPDB) elif i == 'SCLPTR': model_exists=self.prepModelPDB(init, mstat, chain, m, 'SCLPTR', mstat.chain_list[chain].sculptor_modelPDB) elif i == 'PLYALA': model_exists=self.prepModelPDB(init, mstat, chain, m, 'PLYALA', mstat.chain_list[chain].plyala_modelPDB) else: sys.stdout.write("Error: Unrecognised model type in Model_Struct.setTypes : %s, quitting..." % i) sys.exit() # Check that the models were created and add the model to the model_list if model_exists == True: mstat.model_list[mstat.chain_list[chain].chainName + '_' + i] = m # Make the results dictionary import make_dictionary mstat.results_dict[mstat.chain_list[chain].chainName + '_' + i]=make_dictionary.makeDict(init.search_dir) mstat.num_MR_models = mstat.num_MR_models + 1 # Otherwise report an error with the model creation else: if self.debug: sys.stdout.write("Model preparation log: Excluding %s model for chain %s as its preparation failed\n" % (i, chain)) if self.debug and model_exists == False: sys.stdout.write("\n") # Tag this up as SUMMARY content sys.stdout.write('\n') report_string="" # Output the list of models if we are running in DEBUG mode mstat.MODEL_report=os.path.join(init.search_dir, "logs", "model_report.log") sys.stdout.write("Model Preparation log: A total of %d models have been created for processing in Molecular Replacement\n" \ % mstat.num_MR_models) report_string+="Model Preparation log: A total of %d models have been created for processing in Molecular Replacement\n" \ % mstat.num_MR_models if init.keywords.USEENSEM: sys.stdout.write("Model Preparation log: An ensemble model has also been prepared\n") report_string+="Model Preparation log: An ensemble model has also been prepared\n" sys.stdout.write("Model Preparation log: The following list of models has been created for MR:\n") report_string+="Model Preparation log: The following list of models has been created for MR:\n\n" sys.stdout.write("Number of domains found: %d\n" % len(target_info.targetDomainDict.keys())) sys.stdout.write("\n") count=1 for model in mstat.model_list.keys(): # Add the model file information to the results dictionary if "MOLREP" in init.keywords.MR_PROGRAM_LIST: mstat.results_dict[model]["MOLREP"]["SearchModel_name"]=mstat.model_list[model].name mstat.results_dict[model]["MOLREP"]["SearchModel_filename"]=mstat.model_list[model].PDBfile mstat.results_dict[model]["MOLREP"]["Job_directory"]=os.path.join(mstat.model_list[model].model_directory, "mr", "molrep") mstat.results_dict[model]["MOLREP"]["STATUS"]="Queued" if "PHASER" in init.keywords.MR_PROGRAM_LIST: mstat.results_dict[model]["PHASER"]["SearchModel_name"]=mstat.model_list[model].name mstat.results_dict[model]["PHASER"]["SearchModel_filename"]=mstat.model_list[model].PDBfile mstat.results_dict[model]["PHASER"]["Job_directory"]=os.path.join(mstat.model_list[model].model_directory, "mr", "phaser") mstat.results_dict[model]["PHASER"]["STATUS"]="Queued" # Catch the chain/domain/multimer identifier (e.g 'A'(C) or 'z23'(D) or '120'(M)) identifier=string.split(mstat.model_list[model].chain_source, "_")[1] # Select and mark the full 'chain' models if len(identifier) == 1 and identifier.isalpha(): base_type="Chain" id=identifier # Select and mark the 'domain' models elif len(identifier) > 1 and identifier[0].isalpha(): base_type="Domain" id=identifier # Select and mark the 'multimer' models elif len(identifier) >= 1 and identifier.isdigit(): base_type="Multimer" id=identifier # Catch for the Ensemble elif identifier == "" and mstat.model_list[model].chain_source == "ensemble_model": base_type="Ensemble" id="ensm" # Send a message if an unknown id is found else: sys.stdout.write("Model Setup Warning: unidentified model identifier '%s' found\n" % identifier) sys.stdout.write("\n") base_type="Unknown_type" id=identifier sys.stdout.write("%d %s -- " % (count, model)) report_string+="%d %s -- " % (count, model) if mstat.model_list[model].type == 'UNMOD': mstat.model_list[model].model_info_string="%s %s of structure %s (unmodified)" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'PDBCLP': mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the PDBclip method" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'MOLREP': mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the Molrep method" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'CHNSAW': #mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the Chainsaw method -- domain %d" \ # % (base_type, id, mstat.model_list[model].PDBName, mstat.chain_list[mstat.model_list[model].chain_source].myPHTargetDomain) mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the Chainsaw method" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'SCLPTR': mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the Sculptor method" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'PLYALA': mstat.model_list[model].model_info_string="%s %s of structure %s prepared using the Polyalanine method" \ % (base_type, id, mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" if mstat.model_list[model].type == 'ENSMBL': mstat.model_list[model].model_info_string="Structure %s prepared as an Ensemble" \ % (mstat.model_list[model].PDBName) sys.stdout.write(mstat.model_list[model].model_info_string + "\n") report_string+=mstat.model_list[model].model_info_string + "\n" count=count+1 mstat.makeI2report(report_string, mstat.MODEL_report) sys.stdout.write("\n") sys.stdout.write("These models will be placed in the folder:\n" + mstat.models_dir[init.WEB_PATH_START:] + "\n") sys.stdout.write("\n") # Output domain-specific model details if init.keywords.DOPHMMER: for domain in target_info.targetDomainDict.keys(): extent=target_info.targetDomainDict[domain].extent midpoint=target_info.targetDomainDict[domain].midpoint sys.stdout.write("Target Domain: %s -- Residue range: %d-%d -- %s\n" % (domain, midpoint-(extent/2), midpoint+(extent/2), os.path.join(mstat.models_dir[init.WEB_PATH_START:], "domain_%s" % domain))) ensemblesLines="" sys.stdout.write("\n\tSingle models for domain %d:\n" % domain) for model in mstat.model_list.keys(): chain_source=mstat.model_list[model].chain_source if chain_source == "ensemble_model": if mstat.model_list[model].targetDomain == domain: ensemblesLines+="\tEnsemble model: %s\n" %(model) else: if mstat.chain_list[chain_source].myPHTargetDomain == domain: sys.stdout.write("\tModel: %s\n" % model) if ensemblesLines != "": sys.stdout.write("\n\tEnsemble models for domain %d:\n" % domain) sys.stdout.write("%s\n" % ensemblesLines) else: sys.stdout.write("\n") #for chain in mstat.chain_list.keys(): # Close the SUMMARY tag for the input details sys.stdout.write('\n') # Sort the model list so that jobs with a higher sequence identity are started first self.sort_model_list(mstat) # def makeGesamtEnsemble(self, init, mstat): # # # Set the best model as the alignment template for Gesamt (ignore ensemble models) # numberGesamtModels=len(mstat.sorted_model_list) # nonEnsembleSortedList=[] # for model in mstat.sorted_model_list: # if "ensemble_model" not in model: # nonEnsembleSortedList.append(model) # else: # numberGesamtModels-=1 # # # take the first non-ensemble entry as alignment base and remove from list # mstat.alignModel=nonEnsembleSortedList[0] # alignModelPDB=mstat.model_list[mstat.alignModel].PDBfile[0] # nonEnsembleSortedList.pop(0) # numberGesamtModels-=1 # # # Set the Gesamt log file, alignment output file and the script file # mstat.gesamtAlnFile=os.path.join(init.sequences_dir, "gesamtMultModelAlign.seq") # logfile=os.path.join(init.logs_dir, "gesamtMultModelAlign.log") # gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlign.sh") # # # Set the name of the multi-model output pdb from Gesamt (Base Ensemble) # mstat.gesamtBaseEnsemble=os.path.join(mstat.models_dir, "ensembles", "gesamtBaseAlignment.pdb") # # # Loop over the models and align them to the alignModel using Gesamt, then move them to the models directory # if numberGesamtModels >= 1: # pdbList=[] # pdbDict=dict([]) # #for model in mstat.model_list.keys(): # for model in nonEnsembleSortedList: # # Create the pdb list and dictionary for this alignment # pdbList.append(mstat.model_list[model].PDBfile[0]) # pdbDict[mstat.model_list[model].PDBfile[0]]=mstat.model_list[model].chain_source[5] # # pdbList.append(alignModelPDB) # pdbDict[alignModelPDB]=mstat.model_list[mstat.alignModel].chain_source[5] # # # Run Gesamt to align to the master and copy to the models directory # gesamtRun=MRBUMP_gesamt.Gesamt() # gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, \ # logfile=logfile, alnfile=mstat.gesamtAlnFile, MERGE_OUTPUT=False, \ # script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG'])) # # Run it again but this time output all models to one PDB file # gesamtScript=os.path.join(init.scripts_dir, "gesamtMultModelAlignOnePDB.sh") # logfile=os.path.join(init.logs_dir, "gesamtMultModelAlignOnePDB.log") # gesamtRun.runGesamt(pdbList=pdbList, pdbDict=pdbDict, outputDIR=mstat.models_dir, outputPDB=mstat.gesamtBaseEnsemble, \ # logfile=logfile, alnfile=mstat.gesamtAlnFile, MERGE_OUTPUT=True, \ # script=gesamtScript, debug=eval(os.environ['MRBUMP_DEBUG'])) # # # # Rename files back to model names from gesamt split and copy to model directories # # Put the alingmodel into the list first # pdbBaseNameDict=dict([]) # pdbBaseNameDict[mstat.alignModel]=os.path.splitext(os.path.basename(mstat.model_list[mstat.alignModel].PDBfile[0]))[0] # # Now add all of the others... # for model in nonEnsembleSortedList: # pdbBaseNameDict[model]=os.path.splitext(os.path.basename(mstat.model_list[model].PDBfile[0]))[0] # # Loop over the gesamt pdbs and rename and copy # for gesamtPDBFile in os.listdir(mstat.models_dir): # if os.path.splitext(gesamtPDBFile)[-1].lower() == ".pdb": # for model in pdbBaseNameDict.keys(): # if pdbBaseNameDict[model] in os.path.splitext(gesamtPDBFile)[0]: # shutil.move(os.path.join(mstat.models_dir, gesamtPDBFile), os.path.join(mstat.models_dir, pdbBaseNameDict[model] + ".pdb")) # shutil.copyfile(os.path.join(mstat.models_dir, pdbBaseNameDict[model] + ".pdb"), mstat.model_list[model].PDBfile[0]) # # # If there was only one model found in the search then don't run gesamt and just copy the model to the models directory # else: # if os.path.isfile(alignModelPDB): # shutil.copyfile(alignModelPDB, os.path.join(mstat.models_dir, os.path.basename(alignModelPDB))) # else: # sys.stdout.write("Warning: could not find model file:\n %s\n" % alignModelPDB) # # def makeAMPLEEnsembles(self, init, mstat): # """ """ # # ensembles = self.ample_generate_ensembles(init, alignment_file=mstat.gesamtAlnFile) # for i in os.listdir(os.path.join(mstat.models_dir, 'ensembles')): # if os.path.splitext(i)[-1].lower() == ".pdb": # m=Models_struct() # m.chain_source=mstat.model_list[mstat.alignModel].chain_source # m.source='ENSM' # m.PDBName=mstat.model_list[mstat.alignModel].PDBName # m.seqID.append(mstat.model_list[mstat.alignModel].seqID[0]) # #m.rms.append(mstat.model_list[mstat.alignModel].rms[0]) # # m.name=os.path.splitext(os.path.basename(i))[0] # m.type='ENSMBL' # m.model_info_string='Ensemble model %s' % m.name # m.model_directory=os.path.join(init.search_dir, 'data', m.name) # m.type_extension='pdb' # m.multi_type='HOMO' # m.PDBfile.append(os.path.join(mstat.models_dir, i)) # # mstat.model_list[m.name]=m # # # Make the results dictionary # import make_dictionary # mstat.results_dict[m.name]=make_dictionary.makeDict(init.search_dir) # # # Insert the ensemble at end top of the sorted list # mstat.sorted_model_list.insert(-1,m.name) def sort_model_list(self, mstat): """ Sort the models in the list according to sequence identity with the target structure. """ for name in mstat.sorted_MR_list: for model in mstat.model_list.keys(): if mstat.model_list[model].chain_source == name and model not in mstat.sorted_model_list: mstat.sorted_model_list.append(model) def makeModelsReport(self, init, mstat): """" Create and output the models report """ topPHScore=0 topPHDomainID=0 count=1 for model in mstat.model_list: x=mstat.model_list[model] if x.chain_source=="ensemble_model": alignment = None alnRange = None tarRange = None tarExtent = None tarMidpoint = None myPHTargetDomain=mstat.model_list[model].myPHTargetDomain PHscore=None else: alignment = mstat.PHresultsDict[x.chain_source].alignment alnRange = mstat.PHresultsDict[x.chain_source].alnRange tarRange = mstat.PHresultsDict[x.chain_source].tarRange tarExtent = mstat.PHresultsDict[x.chain_source].tarExtent tarMidpoint = mstat.PHresultsDict[x.chain_source].tarMidpoint myPHTargetDomain=mstat.chain_list[x.chain_source].myPHTargetDomain PHscore=mstat.PHresultsDict[x.chain_source].score if PHscore > topPHScore: topPHScore=PHscore topPHDomainID=myPHTargetDomain x.domain_PDBfile=os.path.join(mstat.models_dir, "domain_%s" % myPHTargetDomain, os.path.split(x.PDBfile[0])[-1]) count=count+1 # Temporary output for MG finding of best domain topPHDomainCSVCoreFile = os.path.join(init.search_dir, "logs", "gesamtCSVCOREfile_%d.csv" % topPHDomainID) topPHDomainModelsFolder = os.path.join(init.search_dir, "models", "domain_%d" % topPHDomainID) topPHDomainLog=open(os.path.join(init.search_dir, "logs", "topModel.txt"), "w") topPHDomainLog.write("topPHDomainID; topPHScore; topPHDomainCSVCoreFile; topPHDomainModelsFolder\n") topPHDomainLog.write("%d; %.2lf; %s; %s\n" % (topPHDomainID, topPHScore, topPHDomainCSVCoreFile, topPHDomainModelsFolder)) topPHDomainLog.close()