#! /usr/bin/env ccp4-python # # Copyright (C) 2005 Ronan Keegan # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Application. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # # import os, string, sys, time #import sets import Get_MR_sum import Solution_struct import MRBUMP_writebest import subprocess, shlex import Cleanup import parse_phaser import parse_refmac import parse_shelxe import parse_buccaneer from subprocess import PIPE, STDOUT try: import pyrvapi except: #sys.stdout.write("Import warning: No pyrvapi\n") sys.stdout.write("") class Soln_Model: """ A class used for holding solution model details """ def __init__(self): self.model_name="" class Queue_info: """ A class to handle batch queue system interactions. """ def __init__(self): self.queue_job_array=set([]) self.job_completion=False self.latest_jobs_count=1 # Jobs that have completed, and have been processed self.finished_jobs=[] # Jobs that have completed, but have not been processed self.latest_jobs=[] self.RESULT=False try: self.debug=eval(os.environ['MRBUMP_DEBUG']) except: self.debug=False def setDEBUG(self, flag): self.debug=flag def get_job_queue(self, init): """ Start a process to find out which jobs are still running """ """ Returns a set() of job ids or None if job queue command failed """ # Set the command line if init.keywords.QTYPE == 'CONDOR': command_line = 'condor_q -format "%d," ClusterId -format "%s\n" JobStatus' else: command_line = "qstat" # Launch if os.name == "nt": process_args = shlex.split(command_line, posix=False) pipe = subprocess.Popen(process_args, shell=True, stdout=PIPE, stderr=STDOUT).stdout else: process_args = shlex.split(command_line) pipe = subprocess.Popen(process_args, stdout=PIPE, stderr=STDOUT).stdout if init.keywords.QTYPE != 'CONDOR': header1=pipe.readline() header2=pipe.readline() line=pipe.readline() log_file = open(os.path.join(init.search_dir, 'bulkMR.log'),'w') queue_job_array = set() # Running jobs are put in 'queue_job_array' while line: line=string.strip(line) if init.keywords.QTYPE == "SGE": queue_job_array.add((int)((string.split(line))[0])) elif init.keywords.QTYPE == "PBS": # So it won't get tripped up by ids of the form "NNNNNN-1" and "NNNNNN-2" id=string.split(line,".")[0] # Sometimes even the numbers have trailing brackets id=string.split(id,'-')[0].rstrip('[]') queue_job_array.add(int(id)) elif init.keywords.QTYPE == "CONDOR": # Return 'None' if queue fetch failed if line.find('-- Failed to fetch ads from')>=0: pipe.close() return None split_list=string.split(line,',') if len(split_list)==2: job_status=split_list[1] job_id=split_list[0] # If job_status is 'Unexpanded'(0), 'Idle' (1), 'Running'(2) if job_status in ['0','1','2']: queue_job_array.add(job_id) # If job_status is 'Held' (5), 'Submission Error' (6) if job_status in ['5', '6']: sys.stdout.write("ERROR: Job %s is being held or in error state\n"%job_id) sys.stdout.write("Deleting job %s using 'condor_rm' command.\n"%job_id) sys.stdout.write("Please re-adjust parameters to correct error.\n\n") os.system('condor_q -analyse %s'%job_id) os.system('condor_rm %s'%job_id) line=pipe.readline() pipe.close() return queue_job_array def qstat_check(self, init, mstat, target_info, JOB_TYPE, MR=None): """ A function to test the SGE/PBS/CONDOR clustering for completion of a list of jobs denoted by their job ID's. Needs a "set" of job id's as input. """ # Continuously check the queue to see if our jobs have finished while self.job_completion == False: self.queue_job_array = set() cntr = 0 while cntr < 5: time.sleep(10) self.queue_job_array = self.get_job_queue(init) # If cannot read the jobs queue, sleep and try again if self.queue_job_array == None: time.sleep(20) cntr += 1 else: break # If failed to read jobs queue too many times, then exit if cntr >= 5: sys.stdout.write("ERROR: Cannot see the jobs in queue\n") sys.stdout.write(" Cannot process any more jobs\n") break if self.debug: sys.stdout.write("len(job_queue_array)=%d\n" % len(self.queue_job_array)) sys.stdout.write("len(mstat.jobid_array)=%d\n" % len(mstat.jobid_array)) sys.stdout.flush() # Check the cluster queue to see if there are any jobs still running. # If there are jobs still running find out which ones have just completed. mstat.parallel_jobs_running = len(mstat.jobid_array.intersection(self.queue_job_array)) if mstat.parallel_jobs_running > 0: if JOB_TYPE=='MOL_REPLACE' and (mstat.solution_found==False or init.keywords.TRYALL==True): # Move the processed jobs from the previous iteration into 'finished_jobs' for job_name in self.latest_jobs: self.finished_jobs.append(job_name) # Find jobs that have completed but have not been processed # Limit the number of jobs otherwise will be too busy to service the cluster queue self.latest_jobs=[] num_to_be_processed=0 for jobID in mstat.jobid_array.difference(self.queue_job_array): if mstat.jobid_dict[jobID] not in self.finished_jobs: self.latest_jobs.append(mstat.jobid_dict[jobID]) num_to_be_processed+=1 if num_to_be_processed>=10: break sys.stdout.flush() if self.debug: num_MR_jobs = 0 if "PHASER" in init.keywords.MR_PROGRAM_LIST: num_MR_jobs = len(mstat.sorted_model_list) if "MOLREP" in init.keywords.MR_PROGRAM_LIST: num_MR_jobs += len(mstat.sorted_model_list) #if init.keywords.USEENSEM: # num_MR_jobs -= 1 sys.stdout.write("Number of MR jobs: %d\n" % num_MR_jobs) if hasattr(mstat, 'parallel_jobs_running'): sys.stdout.write("Number of parallel jobs running: %d\n" % mstat.parallel_jobs_running) if init.keywords.QSIZE == sys.maxint: sys.stdout.write("Maximum jobs at any one time (QSIZE): Unlimited\n") else: sys.stdout.write("Maximum jobs at any one time (QSIZE): %d\n" % init.keywords.QSIZE) sys.stdout.flush() # Process recently completed jobs mstat.parallel_jobs_finished = len(self.finished_jobs) if self.debug: sys.stdout.write("Number of finished jobs: %d\n" % mstat.parallel_jobs_finished) sys.stdout.flush() self.check_result_parallel(init, mstat, target_info) # If doing MR - do we need to run more jobs? if JOB_TYPE=='MOL_REPLACE' and (mstat.solution_found==False or init.keywords.TRYALL==True): if self.debug: sys.stdout.write("%d jobs still running/pending at %s\n" % (mstat.parallel_jobs_running, time.ctime())) sys.stdout.flush() # If under threshold, then submit more jobs if mstat.parallel_jobs_running < init.keywords.QSIZE and MR!=None and self.debug: sys.stdout.write("Submitting more jobs... %d %d\n" % (mstat.parallel_jobs_running, init.keywords.QSIZE)) if init.keywords.JSRVIEW and sys.modules.has_key('pyrvapi'): MR.MR_start(target_info, init, mstat, init.keywords.QSIZE - mstat.parallel_jobs_running, pyrvapi) else: MR.MR_start(target_info, init, mstat, init.keywords.QSIZE - mstat.parallel_jobs_running, None) # All jobs have finished running, so process the jobs that have completed and exit loop. if mstat.parallel_jobs_running==0 and ((MR!=None and MR.MR_isdone(init, mstat)) or (JOB_TYPE!='MOL_REPLACE')): # Setting job completion flag will exit loop self.job_completion=True if JOB_TYPE=='MOL_REPLACE' and (mstat.solution_found==False or init.keywords.TRYALL==True): # Move the processed jobs from the previous iteration into 'finished_jobs' for job_name in self.latest_jobs: self.finished_jobs.append(job_name) # Find jobs that have completed but have not been processed self.latest_jobs=[] for jobID in mstat.jobid_array.difference(self.queue_job_array): if mstat.jobid_dict[jobID] not in self.finished_jobs: self.latest_jobs.append(mstat.jobid_dict[jobID]) # Process the last batch of recently completed jobs mstat.parallel_jobs_finished = len(self.finished_jobs) self.check_result_parallel(init, mstat, target_info) # For the final time, move 'latest_jobs' into 'finished_jobs' # and update 'parallel_jobs_finished' for job_name in self.latest_jobs: self.finished_jobs.append(job_name) mstat.parallel_jobs_finished = len(self.finished_jobs) self.latest_jobs=[] if mstat.solution_found == False and JOB_TYPE == 'MOL_REPLACE' and mstat.MRPROGRAM == 'MOLREP' and 'PHASER' in init.keywords.MR_PROGRAM_LIST: sys.stdout.write("No solution found using Molrep, trying Phaser.\n") sys.stdout.write("\n") elif mstat.solution_found == False and JOB_TYPE == 'MOL_REPLACE' and mstat.MRPROGRAM == 'PHASER': sys.stdout.write("No solution found using Phaser.\n") sys.stdout.write("All jobs complete\n") sys.stdout.write("\n") else: sys.stdout.write("All jobs complete\n") sys.stdout.write("\n") # Reset the 'job_completion' flag and the 'finished_jobs' and 'latest_jobs' arrays. self.job_completion=False if self.queue_job_array!=None: self.queue_job_array.clear() else: self.queue_job_array = set() self.finished_jobs=[] self.latest_jobs=[] def check_result(self, mstat, init, model, target_info, parallel_job=None): """ A function to check the refinement results for the finished job""" if parallel_job is not None: #job_info=mstat.jobname_dict[parallel_job] job_info=mstat.model_list[model] job_info.MRPROGRAM=mstat.jobname_dict[parallel_job].MRPROGRAM else: job_info=mstat.model_list[model] sys.stdout.write("\n") sys.stdout.write("#########################################################################\n") if parallel_job is not None: sys.stdout.write("Job '%s' has completed for model '%s'\n" % (parallel_job, model)) else: sys.stdout.write("Job has completed for model '%s'\n" % model) sys.stdout.write("#########################################################################\n") sys.stdout.write("\n") #if self.debug: # print "Checking the PDB summary log file for model: " + model + "...." # print "File name: " + job_info.mr_pdb_sum_logfile # print "" #f os.path.isfile(job_info.mr_pdb_sum_logfile): # if self.debug: # print "File exists" # print "" # Extract the number of chains from the summary log file. # We assume the number of chains equals the number of monomers that # the MR program has managed to place. There should be no waters, ligands, etc. # in the search model. # This may have to be adjusted for complexes. # chains_found=self.getPdbChains(model, init, mstat, job_info.mr_pdb_sum_logfile) # if chains_found: # print "Molecular Replacement found %d out of %d chains." % (chains_found, target_info.no_of_mols) # print "" # else: # print "Warning: Molecular Replacement found no chains." # print "" #lse: # print "Bad error: no Pdb summary log file!" # print "" #jmht Get the phaser/molrep scores regardless of success (required by AMPLE) success=False if job_info.MRPROGRAM=="MOLREP": success = self.getMolrepResults(init, mstat, model) elif job_info.MRPROGRAM=="PHASER": # This sets a temporary Solution_type_PHASER of 'COMPLETED' if a result is found success = self.getPhaserResults(init, mstat, model) else: raise RuntimeError,"Unrecognised MRPROGRAM: {0}".format(job_info.MRPROGRAM) mr_sum = self.getMrSum(job_info, init, mstat, model) # Only check further if MOLREP or PHASER worked if success: # Set the refmac details depending on the MR program used if job_info.MRPROGRAM=="MOLREP": refmac_logfile=job_info.refmac_molrep_logfile elif job_info.MRPROGRAM=="PHASER": refmac_logfile=job_info.refmac_phaser_logfile # If the score file exists then we can assume that the refinement completed successfully. # Will change this to look at terminal signals later. if self.debug: print "Checking to see if Refmac produced a log file for model: {0}\nFile name: {0}\n".format(model,refmac_logfile) score_found=False if os.path.isfile(refmac_logfile): if self.debug: print "REFMAC logfile exists: {0}".format(refmac_logfile) # Set the model parameters associated with refinement job_info.setRefinement_Program('Refmac') score_found = self.getRefmacScores(model, init, mstat, job_info.MRPROGRAM, refmac_logfile) if score_found: # Set some temporary variables to hold the refmac scores if job_info.MRPROGRAM=="MOLREP": init_rfree = job_info.refmac_molrep_initRfree final_rfree = job_info.refmac_molrep_finlRfree if job_info.MRPROGRAM=="PHASER": init_rfree = job_info.refmac_phaser_initRfree final_rfree = job_info.refmac_phaser_finlRfree diff = final_rfree/init_rfree solution="poor" # If the final Rfree is less than 0.35 then we can asssume that a solution has been found. if final_rfree <= 0.35: solution="good" print "Template Model: %s has produced a solution, final Rfree = %.3f" % (model, final_rfree) if parallel_job: self.rm_par_jobs(init,mstat) self.setSolutionDetails(init, mstat, model, job_info.MRPROGRAM, solution) # If the Rfree score falls by more than 20 % and the final value is less than .5 we can # assume a successful refinement elif final_rfree <= 0.5 and diff <= 0.8: solution="marginal" print "Template Model: %s has produced a solution, final Rfree = %.3f and is %.3f of initial Rfree" % (model, final_rfree, diff) if parallel_job: self.rm_par_jobs(init,mstat) self.setSolutionDetails(init, mstat, model, job_info.MRPROGRAM, solution) elif final_rfree <= 0.48: solution="marginal" # Write the details to the results HTML if parallel_job: mstat.mr_results.write_marginal_solnParallel(init, mstat, model, parallel_job, final_rfree, diff) self.setSolutionDetails(init, mstat, model, job_info.MRPROGRAM, solution) self.printFileDetails(init, job_info, model, "marginal", MARG_TYPE=1) elif final_rfree <= 0.55 and diff <=0.95: solution="marginal" # Write the details to the results HTML if parallel_job: mstat.mr_results.write_marginal_solnParallel(init, mstat, model, parallel_job, final_rfree, diff) self.setSolutionDetails(init, mstat, model, job_info.MRPROGRAM, solution) self.printFileDetails(init, job_info, model, solution, MARG_TYPE=2) else: self.setSolutionDetails(init, mstat, model, job_info.MRPROGRAM, solution) self.printFileDetails(init, job_info, model, solution) # If a solution has been found determine the Molecular Replacement details. # Test to see if a PDB file was created and set program variable. if solution=="good" or solution=="marginal": # Output the refinement scores from the refmac log file # Set the title of the loggraph table to be specific to this model mr_sum.setRefmacRfacTitle("Rfactor analysis, stats vs cycle for model %s" % model) # Grab the Rfactor table from the Refmac output mr_sum.get_refmac_summary(refmac_logfile) # Set the refmac smartie log if job_info.MRPROGRAM=="MOLREP": job_info.refmac_molrep_smartie_log=mr_sum.log job_info.setRefmacMolrepSummary(mr_sum.refmac_summary) elif job_info.MRPROGRAM=="PHASER": job_info.refmac_phaser_smartie_log=mr_sum.log job_info.setRefmacPhaserSummary(mr_sum.refmac_summary) print "" print "#########################################################################" + len(model)*"#" print "##### Refinement results from Refmac for " + model + ": #####" print "#########################################################################" + len(model)*"#" print "" print mr_sum.refmac_summary print "" # If we now have a solution we can break out of the model list loop (when in parallel) if mstat.solution_found: if parallel_job is not None and not init.keywords.TRYALL: mstat.mr_results.copy_solution_files(mstat, os.path.join(mstat.results_dir, 'solution')) mstat.mr_results.display_results(mstat, init, target_info) return True mstat.mr_results.display_results(mstat, init, target_info) if job_info.marginal_solution_MOLREP: sys.stdout.write("Output MTZ file from Refmac for model " + model + " (marginal solution search model):\n") sys.stdout.write(" " + job_info.refmac_molrep_MTZOUTfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("Output PDB file from Refmac for model " + model + ":\n") sys.stdout.write(" " + job_info.refmac_molrep_PDBfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("\n") elif job_info.marginal_solution_PHASER: sys.stdout.write("Output MTZ file from Refmac for model " + model + " (marginal solution search model):\n") sys.stdout.write(" " + job_info.refmac_phaser_MTZOUTfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("Output PDB file from Refmac for model " + model + ":\n") sys.stdout.write(" " + job_info.refmac_phaser_PDBfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("\n") if init.keywords.TRYALL: sys.stdout.write("(Running in TRYALL mode) \n") sys.stdout.write("\n") sys.stdout.write("*******************************************************************************************\n") sys.stdout.write("\n") # Add SHELXE results to the results dictionary if init.keywords.SHELXE: if job_info.MRPROGRAM=="MOLREP": logfile=job_info.shelxe_molrep_logfile elif job_info.MRPROGRAM=="PHASER": logfile=job_info.shelxe_phaser_logfile self.setResultsDict(init, mstat, model, logfile=logfile, mrprogram=job_info.MRPROGRAM, program="SHELXE") # If no score file exists then we can assume that Refinement/MR failed and continue to the next model. else: if not os.path.isfile(refmac_logfile): sys.stdout.write("Refmac log file not found for job number: %d, model ID: %s\n" \ % (job_info.MR_job_number, model)) if job_info.MRPROGRAM=="MOLREP": job_info.solution_type_MOLREP = "REFMAC_FAIL" if job_info.MRPROGRAM=="PHASER": job_info.solution_type_PHASER = "REFMAC_FAIL" # JMHT BELOW WAS ONLY IN SERIAL CASE - NOT SURE IF WANTED/NEEDED # sys.stdout.write("Refmac log file not found for: %s\n" % model) # sys.stdout.write("\n") # # # Set the solution type to a Refmac failure (if not already set) # if job_info.MRPROGRAM=="MOLREP": # if job_info.solution_type_MOLREP == "": # job_info.solution_type_MOLREP = "REFMAC_FAIL" # if job_info.MRPROGRAM=="PHASER": # if job_info.solution_type_PHASER == "": # job_info.solution_type_PHASER = "REFMAC_FAIL" # # sys.stdout.write("Checking MR output...\n") # # if job_info.MRPROGRAM=="MOLREP": # if os.path.isfile(job_info.molrep_PDBfile): # sys.stdout.write("Molrep produced a PDB, error was in Refmac\n") # sys.stdout.write("Check the refmac logfile:\n %s\n" % job_info.refmac_molrep_logfile[init.WEB_PATH_START:]) # sys.stdout.write("\n") # else: # sys.stdout.write("Molrep failed to produce a PDB\n") # sys.stdout.write("Check the molrep logfile:\n %s\n" % job_info.molrep_logfile[init.WEB_PATH_START:]) # sys.stdout.write("\n") # job_info.solution_type_MOLREP = "MOLREP_FAIL" # # if job_info.MRPROGRAM=="PHASER": # if os.path.isfile(job_info.phaser_PDBfile): # sys.stdout.write("Phaser produced a PDB, error was in Refmac\n") # sys.stdout.write("Check the refmac logfile:\n %s\n" % job_info.refmac_phaser_logfile[init.WEB_PATH_START:]) # sys.stdout.write("\n") # else: # sys.stdout.write("Phaser failed to produce a PDB\n") # sys.stdout.write("Check the phaser logfile:\n %s\n" % job_info.phaser_logfile[init.WEB_PATH_START:]) # sys.stdout.write("\n") # job_info.solution_type_PHASER = "PHASER_FAIL" # Create a solution structure for this completed job sm=Solution_struct.Soln_Model() sm.soln_model_name=model + "_" + job_info.MRPROGRAM sm.model_source=model sm.mrprogram=job_info.MRPROGRAM if job_info.MRPROGRAM=="MOLREP": sm.solution_type=job_info.solution_type_MOLREP if job_info.MRPROGRAM=="PHASER": sm.solution_type=job_info.solution_type_PHASER # Add this model to the list of successfully completed jobs mstat.soln_list[sm.soln_model_name]=sm # If running in LITE mode clean out the MR directories if init.keywords.LITE: if job_info.MRPROGRAM=="PHASER": purge=Cleanup.Clean() purge.purgeDirectory(os.path.join(job_info.model_directory, "mr", "phaser"), Except=["sh", "pdb", "mtz"]) if job_info.MRPROGRAM=="MOLREP": purge=Cleanup.Clean() purge.purgeDirectory(os.path.join(job_info.model_directory, "mr", "molrep"), Except=["sh"]) if parallel_job is not None: # Print out the summary of results so far best=MRBUMP_writebest.BestLog() best.write_results_log(init, mstat, self.latest_jobs_count) self.latest_jobs_count+=1 mstat.sorted_soln_list=best.sorted_soln_list sys.stdout.write(best.report) return False def check_result_parallel(self, init, mstat, target_info): # Loop over the list of models that have just completed refinement. for parallel_job in self.latest_jobs: model = mstat.jobname_dict[parallel_job].model_name # Get the model name ok=self.check_result(mstat, init, model, target_info, parallel_job=parallel_job) if ok: break return def getMolrepResults(self, init, mstat, model): """ Get the results of a molrep job """ success=True # Set the solution type to a Molrep failure mstat.model_list[model].solution_type_MOLREP = "MOLREP_FAIL" # If there is a PDB file if not os.path.isfile(mstat.model_list[model].molrep_PDBfile): if self.debug: sys.stdout.write("Molrep PDB file not found for: %s\n" % model) sys.stdout.write("PDB File name: %s\n\n" % mstat.model_list[model].molrep_PDBfile[init.WEB_PATH_START:]) return False if self.debug: sys.stdout.write("Molrep PDB file exists\nPDB file name: {0}\n\n".format(mstat.model_list[model].molrep_PDBfile[init.WEB_PATH_START:])) # Read and set the Molrep variables from the solution file. if not os.path.isfile(mstat.model_list[model].molrep_solnfile): if self.debug: sys.stdout.write("Molrep solution file not found for: {0}\nFile name: {1}\n\n" % model,mstat.model_list[model].molrep_solnfile[init.WEB_PATH_START:]) return False if self.debug: sys.stdout.write("Molrep solution file exists\nFile name: {0}\n\n".format(mstat.model_list[model].molrep_solnfile[init.WEB_PATH_START:])) sys.stdout.write("\n") sys.stdout.write("######################################################################" + len(model)*"#" + "\n") sys.stdout.write("##### Summary of solutions from Molrep for " + model + ": #####\n") sys.stdout.write("######################################################################" + len(model)*"#" + "\n") sys.stdout.write("\n") sys.stdout.write(mstat.model_list[model].molrep_summary + "\n") # Open the log file and extract the scores (move this to a separate class at some stage) with open(mstat.model_list[model].molrep_solnfile) as log: line=log.readline() while line: if 'Contrast =' in line: mstat.model_list[model].molrep_contrast=float(string.split(line)[-1]) mstat.results_dict[model]["MOLREP"]["MOLREP_score"] = mstat.model_list[model].molrep_contrast if 'Time: ' in line and 'Elapsed:' in line: moltime=string.split(line, "Elapsed:")[-1] sectime=int(string.split(moltime)[0].replace("h",""))*3600 + int(string.split(moltime)[1].replace("m",""))*60 + int(string.split(moltime)[2].replace("s","")) mstat.model_list[model].molrep_time=sectime mstat.results_dict[model]["MOLREP"]["MOLREP_score"] = mstat.model_list[model].molrep_contrast mstat.results_dict[model]["MOLREP"]["MOLREP_time"] = mstat.model_list[model].molrep_time break line=log.readline() # Get the Buccaneer results if init.keywords.BUCCANEER and os.path.isfile(mstat.model_list[model].buccaneer_molrep_logfile): bp = parse_buccaneer.BuccaneerLogParser(mstat.model_list[model].buccaneer_molrep_logfile) mstat.results_dict[model]["MOLREP"]["BUCC_version"] = bp.version mstat.results_dict[model]["MOLREP"]["BUCC_final_Rfact"] = bp.finalRfact mstat.results_dict[model]["MOLREP"]["BUCC_final_Rfree"] = bp.finalRfree if init.keywords.ARPWARP: mstat.results_dict[model]["MOLREP"]["ARP_final_Rfact"] = mstat.model_list[model].arpwarp_molrep_finalRfact mstat.results_dict[model]["MOLREP"]["ARP_final_Rfree"] = mstat.model_list[model].arpwarp_molrep_finalRfree # Set this Molrep job as finished mstat.results_dict[model]["MOLREP"]["STATUS"]="Finished" return success def getPdbChains(self, model, init, mstat, pdb_sum_logfile): """ Extract the number of chains listed in the log file of Pdb summary utility. """ chains_found = 0 log=open(pdb_sum_logfile,'r') # Loop over the file and extract the number of chains found line=log.readline() while line: if 'Number' in line and 'molecules' in line: chains_found = float(string.split(line)[4]) line=log.readline() log.close() # Check that the number of chains were found if chains_found == 0: sys.stdout.write("mrbump_get_pdb_details Error: No chains listed for model: " + model + "\n") sys.stdout.write("mrbump_get_pdb_details Error: Logfile: \n") sys.stdout.write(" " + pdb_sum_logfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("\n") return chains_found def getMrSum(self, job_info, init, mstat, model): if job_info.MRPROGRAM=="MOLREP": # Get the solution summary: mr_sum=Get_MR_sum.MR_Summary() mr_sum.get_molrep_summary(mstat.model_list[model].molrep_solnfile, init.molrepVersion) # Set the molrep smartie log mstat.model_list[model].molrep_smartie_log=mr_sum.log mstat.model_list[model].setMolrepSummary(mr_sum.molrep_summary) elif job_info.MRPROGRAM=="PHASER": # Get the solution summary: mr_sum=Get_MR_sum.MR_Summary() mr_sum.get_phaser_summary(mstat.model_list[model].phaser_logfile) else: raise RuntimeError,"Unrecoginsed MRPROGRAM: {0}".format(job_info.MRPROGRAM) return mr_sum def getPhaserResults(self, init, mstat, model): """ Get the results of a phaser job """ # Assume failure and then update based on what we find success=False phaser_result = "PHASER_FAIL" # Check PDB file - this is where we get the TFZ/LLG values from as they may not be present in the log if the job was killed pdb = mstat.model_list[model].phaser_PDBfile if os.path.isfile(pdb): if self.debug: sys.stdout.write("Phaser PDB file exists\nPDB file name: {0}\n'n".format(pdb)) # Get the values from the pdb pdbp=parse_phaser.PhaserPdbParser(pdb) mstat.results_dict[model]["PHASER"]["PHASER_LLG"] = pdbp.LLG mstat.results_dict[model]["PHASER"]["PHASER_TFZ"] = pdbp.TFZ mstat.results_dict[model]["PHASER"]["PHASER_RFZ"] = pdbp.RFZ phaser_result = 'PHASER_COMPLETED' # If we get a TFZ score then phaser did something - LLG scores may not be produced under all circumstances if not mstat.results_dict[model]["PHASER"]["PHASER_TFZ"] is None: success=True else: sys.stdout.write("Phaser PDB file not found for: %s\n" % model) sys.stdout.write("PDB File name: %s\n\n" % mstat.model_list[model].phaser_PDBfile) phaser_result='NO_PHASER_PDB' # We parse the Phaser logfile for additional information on how the job progressed phaser_logfile=mstat.model_list[model].phaser_logfile if os.path.isfile(phaser_logfile): # Parse the log file for various bits we didn't get from the pdb pp = parse_phaser.PhaserLogParser(phaser_logfile) mstat.results_dict[model]["PHASER"]["PHASER_version"] = pp.version mstat.results_dict[model]["PHASER"]["PHASER_time"] = pp.time mstat.results_dict[model]["PHASER"]["PHASER_killed"] = pp.killed mstat.results_dict[model]["PHASER"]["PHASER_error"] = pp.errorStr if pp.error: phaser_result='PHASER_ERROR' # We can get errors with PHASER even when we get a valid PDB produced if success: mstat.model_list[model].setPhaserLLGscore(mstat.results_dict[model]["PHASER"]["PHASER_LLG"]) sys.stdout.write("\n") sys.stdout.write("\n###########################################" + len(model)*"#") sys.stdout.write("\n##### Results from Phaser for " + model + ": #####\n") sys.stdout.write("###########################################" + len(model)*"#") sys.stdout.write("\n\n") #sys.stdout.write("Final TFZ: %.2lf LLG: %.2lf\n" % (pp.TFZ, pp.LLG)) else: sys.stdout.write("Phaser Log file not found for: %s\n" % model) sys.stdout.write("Phaser Log file name: %s\n\n" % phaser_logfile) phaser_result='PHASER_LOGFILE_MISSING' # Check the solution file. - not sure if this is really necessary? if success and not os.path.isfile(mstat.model_list[model].phaser_solnfile): sys.stdout.write("Phaser solution file not found for: %s\n" % model) sys.stdout.write("File name: %s\n\n" % mstat.model_list[model].phaser_solnfile) phaser_result='PHASER_SOLUTION_FILE_MISSING' success=False else: if self.debug: sys.stdout.write("Phaser solution file exists\nFile name: {0}\n'n".format(mstat.model_list[model].phaser_solnfile)) # Set the result mstat.model_list[model].solution_type_PHASER = phaser_result mstat.results_dict[model]["PHASER"]["Solution_Type"] = phaser_result # Set this Phaser job as finished mstat.results_dict[model]["PHASER"]["STATUS"]="Finished" return success def getRefmacScores(self, model, init, mstat, MRPROGRAM, refmac_logfile): """ Extract the relevant scores from the refmac log file. """ try: rp=parse_refmac.RefmacLogParser(refmac_logfile) except Exception,e: sys.stdout.write("Error processing refmac logfile {0}\n{1}\n".format(refmac_logfile,e)) return False score_found = False if rp.finalRfact is not None and rp.finalRfree is not None: score_found = True if MRPROGRAM=="MOLREP": mstat.results_dict[model]["MOLREP"]["REFMAC_version"]=rp.version mstat.model_list[model].refmac_molrep_initRfact=rp.initRfact mstat.model_list[model].refmac_molrep_initRfree=rp.initRfree mstat.model_list[model].refmac_molrep_finlRfact=rp.finalRfact mstat.model_list[model].refmac_molrep_finlRfree=rp.finalRfree mstat.results_dict[model]["MOLREP"]["final_Rfact"] = rp.finalRfact mstat.results_dict[model]["MOLREP"]["final_Rfree"] = rp.finalRfree #mstat.results_dict[model]["MOLREP"]["final_Rfact"] = mstat.model_list[model].refmac_phaser_finlRfact #mstat.results_dict[model]["MOLREP"]["final_Rfree"] = mstat.model_list[model].refmac_phaser_finlRfree elif MRPROGRAM=="PHASER": mstat.results_dict[model]["PHASER"]["REFMAC_version"]=rp.version mstat.model_list[model].refmac_phaser_initRfact=rp.initRfact mstat.model_list[model].refmac_phaser_initRfree=rp.initRfree mstat.model_list[model].refmac_phaser_finlRfact=rp.finalRfact mstat.model_list[model].refmac_phaser_finlRfree=rp.finalRfree mstat.results_dict[model]["PHASER"]["final_Rfact"] = rp.finalRfact mstat.results_dict[model]["PHASER"]["final_Rfree"] = rp.finalRfree #mstat.results_dict[model]["PHASER"]["final_Rfact"] = mstat.model_list[model].refmac_phaser_finlRfact #mstat.results_dict[model]["PHASER"]["final_Rfree"] = mstat.model_list[model].refmac_phaser_finlRfree else: sys.stdout.write("Refmac Scoring Error: Initial and Final Rfree values were not found for model: " + model + "\n") sys.stdout.write("Refmac Scoring Error: Logfile: \n") sys.stdout.write(" " + refmac_logfile[init.WEB_PATH_START:] + "\n") sys.stdout.write("\n") return score_found def printFileDetails(self, init, job_info, model, SOLN_TYPE, MARG_TYPE=0): """ Output the file details to the stdout """ # If it is a marginal solution output the log and PDB file details if SOLN_TYPE == "marginal": # Output according to the type of marginal solution if MARG_TYPE==1: if job_info.MRPROGRAM=="MOLREP": sys.stdout.write("Template Model: %s has produced a marginal solution, final Rfree = %.3f\n" \ % (model, job_info.refmac_molrep_finlRfree)) if job_info.MRPROGRAM=="PHASER": sys.stdout.write("Template Model: %s has produced a marginal solution, final Rfree = %.3f\n" \ % (model, job_info.refmac_phaser_finlRfree)) if MARG_TYPE==2: if job_info.MRPROGRAM=="MOLREP": diff = job_info.refmac_molrep_finlRfree/job_info.refmac_molrep_initRfree sys.stdout.write("Template Model: %s has produced a marginal solution, final Rfree = %.3f and is %.3f of initial Rfree\n" \ % (model, job_info.refmac_molrep_finlRfree, diff)) if job_info.MRPROGRAM=="PHASER": diff = job_info.refmac_phaser_finlRfree/job_info.refmac_phaser_initRfree sys.stdout.write("Template Model: %s has produced a marginal solution, final Rfree = %.3f and is %.3f of initial Rfree\n" \ % (model, job_info.refmac_phaser_finlRfree, diff)) sys.stdout.write("\n") if job_info.MRPROGRAM=="MOLREP": if init.keywords.LITE: sys.stdout.write("Running in 'LITE' mode. Output files from Molrep " + \ "have been removed but can be re-generated by running:\n %s\n" % job_info.molrep_scriptfile1[init.WEB_PATH_START:]) else: sys.stdout.write("Molrep log file:\n %s\n" % job_info.molrep_logfile[init.WEB_PATH_START:]) sys.stdout.write("Molrep PDB file:\n %s\n" % job_info.molrep_PDBfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac log file:\n %s\n" % job_info.refmac_molrep_logfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac PDB file:\n %s\n" % job_info.refmac_molrep_PDBfile[init.WEB_PATH_START:]) elif job_info.MRPROGRAM=="PHASER": if init.keywords.LITE: sys.stdout.write("Running in 'LITE' mode. Output files from Phaser " + \ "have been removed but can be re-generated by running:\n %s\n" % job_info.phaser_scriptfile[init.WEB_PATH_START:]) else: sys.stdout.write("Phaser log file:\n %s\n" % job_info.phaser_logfile[init.WEB_PATH_START:]) sys.stdout.write("Phaser PDB file:\n %s\n" % job_info.phaser_PDBfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac log file:\n %s\n" % job_info.refmac_phaser_logfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac PDB file:\n %s\n" % job_info.refmac_phaser_PDBfile[init.WEB_PATH_START:]) # If it's a failure then just report the log file details elif SOLN_TYPE == "poor": if job_info.MRPROGRAM=="MOLREP": sys.stdout.write("Template Model: %s produced a poor solution (Final Rfree = %.3f)\n" \ % (model, job_info.refmac_molrep_finlRfree)) sys.stdout.write("\n") if init.keywords.LITE: sys.stdout.write("Running in 'LITE' mode. Output files from Molrep " + \ "have been removed but can be re-generated by running:\n %s\n" % job_info.molrep_scriptfile1[init.WEB_PATH_START:]) else: sys.stdout.write("Molrep log file:\n %s\n" % job_info.molrep_logfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac log file:\n %s\n" % job_info.refmac_molrep_logfile[init.WEB_PATH_START:]) elif job_info.MRPROGRAM=="PHASER": sys.stdout.write("Template Model: %s produced a poor solution (Final Rfree = %.3f)\n" \ % (model, job_info.refmac_phaser_finlRfree)) sys.stdout.write("\n") if init.keywords.LITE: sys.stdout.write("Running in 'LITE' mode. Output files from Phaser " + \ "have been removed but can be re-generated by running:\n %s\n" % job_info.phaser_scriptfile[init.WEB_PATH_START:]) else: sys.stdout.write("Phaser log file:\n %s\n" % job_info.phaser_logfile[init.WEB_PATH_START:]) sys.stdout.write("Refmac log file:\n %s\n" % job_info.refmac_phaser_logfile[init.WEB_PATH_START:]) else: sys.stdout.write("Job checking error: unrecognised value for SOLN_TYPE '%s' in printFileDetails()\n" % SOLN_TYPE) sys.stdout.write("\n") sys.stdout.write("\n") def rm_par_jobs(self,init,mstat): if not init.keywords.TRYALL: sys.stdout.write("removing all other jobs....\n") for jobID in mstat.jobid_array: if init.keywords.QTYPE == 'CONDOR': os.system("condor_rm " + `jobID` + " > /dev/null") else: os.system("qdel " + `jobID` + " > /dev/null") return def setSolutionDetails(self, init, mstat, model, mr_program, SOLN_TYPE): """ Set the solution details """ if mr_program == "MOLREP": if SOLN_TYPE=="good": # Set the general solution found flag mstat.setSolutionFound(True) # Set the solution model to the current one mstat.setSolutionModel(model) # Set the MR program that was used to give the solution mstat.setSolutionMRProgram(mr_program) # Set this model to be a good solution mstat.model_list[model].good_solution_MOLREP = True mstat.model_list[model].solution_type_MOLREP = "GOOD" elif SOLN_TYPE=="marginal": # Set the marginal solution flag to true mstat.model_list[model].marginal_solution_MOLREP = True mstat.model_list[model].solution_type_MOLREP = "MARGINAL" elif SOLN_TYPE=="poor": # Set the marginal solution flag to true mstat.model_list[model].poor_solution_MOLREP = True mstat.model_list[model].solution_type_MOLREP = "POOR" else: sys.stdout.write("Job checking error: unrecognised value for SOLN_TYPE '%s' in setSolutionDetailsParallel()\n" % SOLN_TYPE) sys.stdout.write("\n") # Update the result dict with the found solution mstat.results_dict[model]["MOLREP"]["Solution_Type"] = mstat.model_list[model].solution_type_MOLREP if mr_program == "PHASER": if SOLN_TYPE=="good": # Set the general solution found flag mstat.setSolutionFound(True) # Set the solution model to the current one mstat.setSolutionModel(model) # Set the MR program that was used to give the solution mstat.setSolutionMRProgram(mr_program) # Set this model to be a good solution mstat.model_list[model].good_solution_PHASER = True mstat.model_list[model].solution_type_PHASER = "GOOD" elif SOLN_TYPE=="marginal": # Set the marginal solution flag to true mstat.model_list[model].marginal_solution_PHASER = True mstat.model_list[model].solution_type_PHASER = "MARGINAL" elif SOLN_TYPE=="poor": # Set the marginal solution flag to true mstat.model_list[model].poor_solution_PHASER = True mstat.model_list[model].solution_type_PHASER = "POOR" else: sys.stdout.write("Job checking error: unrecognised value for SOLN_TYPE '%s' in setSolutionDetailsParallel()\n" % SOLN_TYPE) sys.stdout.write("\n") # Update the result dict with the found solution mstat.results_dict[model]["PHASER"]["Solution_Type"]=mstat.model_list[model].solution_type_PHASER # Get the Buccaneer results if init.keywords.BUCCANEER: blog=mstat.model_list[model].buccaneer_phaser_logfile if os.path.isfile(blog): bp = parse_buccaneer.BuccaneerLogParser(blog) mstat.results_dict[model]["PHASER"]["BUCC_version"] = bp.version mstat.results_dict[model]["PHASER"]["BUCC_final_Rfact"] = bp.finalRfact mstat.results_dict[model]["PHASER"]["BUCC_final_Rfree"] = bp.finalRfree if init.keywords.ARPWARP: mstat.results_dict[model]["PHASER"]["ARP_final_Rfact"] = mstat.model_list[model].arpwarp_phaser_finalRfact mstat.results_dict[model]["PHASER"]["ARP_final_Rfree"] = mstat.model_list[model].arpwarp_phaser_finalRfree def setResultsDict(self, init, mstat, model, logfile="", mrprogram="", program=""): """ Add the results to the results dictionary """ if program == "SHELXE": # Set the SHELXE CC value and average chain length for partial model if os.path.isfile(logfile): sp=parse_shelxe.ShelxeLogParser(logfile) if sp.CC and sp.avgChainLength: # Only set if found as are initialised to 0.0 if mrprogram == "PHASER": mstat.model_list[model].shelxe_phaser_CCscore = sp.CC mstat.model_list[model].shelxe_phaser_AvgChainLen = sp.avgChainLength elif mrprogram == "MOLREP": mstat.model_list[model].shelxe_molrep_CCscore = sp.CC mstat.model_list[model].shelxe_molrep_AvgChainLen = sp.avgChainLength mstat.results_dict[model][mrprogram]["SHELXE_CC"] = sp.CC mstat.results_dict[model][mrprogram]["SHELXE_ACL"] = sp.avgChainLength mstat.results_dict[model][mrprogram]["SHELXE_MCL"] = sp.maxChainLength mstat.results_dict[model][mrprogram]["SHELXE_NC"] = sp.numChains mstat.results_dict[model][mrprogram]["SHELXE_time"] = sp.cputime mstat.results_dict[model][mrprogram]["SHELXE_wMPE"] = sp.wMPE mstat.results_dict[model][mrprogram]["SHELXE_os"] = sp.originShift else: sys.stdout.write("%s: warning: no log file found for SHELXE (after %s):\n %s" %(model, mrprogram.lower(), logfile))