#!/usr/bin/python # Python script # # # # Copyright (C) 2007 --- 2012 Fei Long, A. Vagin, G. Murshudov # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Library. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # #==================================================================== ## The date of last modification: 29/03/2007 # Report any problem to fei@ysbl.york.ac.uk import os import sys import glob import re import math import string import shutil import time import random # modules (XML DOM) related to handle XML style files xml_path1= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom") xml_path2= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom","ext") xml_path3= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom","ext","reader") sys.path.append(xml_path1) sys.path.append(xml_path2) sys.path.append(xml_path3) from StripXml import StripXml import PyExpat from xml.parsers.expat import ExpatError # modules defined for structural hierachy py_path= os.path.join(os.getenv("BALBES_ROOT"),"bin_py") sys.path.append(py_path) from StructHierachy import Model from StructHierachy import Structure from StructHierachy import Sequence from StructHierachy import Assembly from StructHierachy import SF_info from StructHierachy import BOStruc # the abstract base class to be inheritted by any class # that wraps an executable code ################################################### class CPathDict(dict): """ A class that manages input, output, and executable pathes, etc, inheritted from the build-in datatype 'dict' """ def __init__( self, t_argvs): self['user'] = os.environ['USER'] self['cur_path'] = os.getcwd() self['pid'] = str(os.getpid()) # use local disk at the node for # (1) scratch dir (molrep, refmac both write frequently to scratch) # (2) maybe database searching and MR template model building (that needs local database) ? try: self['local'] = os.environ['CCP4_SCR'] except: try: self['local'] = os.environ['TMPDIR'] except: self['local'] = '/tmp' self['local_pdb_path'] = "/data2/fei/Database_BALBES/All_pdb/All" self['start_time'] = time.ctime(time.time() ) # system path should be put into environmment varibles when installing system self.setSystem() # Match all key words entered by the user keyWordList = ["OUT_ROOTDIR", "NAME_ROOT", "HKLIN", "SEQIN", "HKLOUT", "XYZOUT", "SOLIN"] i_argv = 0 for a_argv in t_argvs : if a_argv[0] == "-" or a_argv in keyWordList : self.matchKW(a_argv, t_argvs, len(t_argvs), i_argv) i_argv = i_argv + 1 # Check if all required pathes have been set, if not, set them to default if not self.has_key("out_root_path"): self['out_root_path'] = os.path.join(self['cur_path'], "output_"+ self['user']) if not glob.glob(self['out_root_path']): os.mkdir(self['out_root_path']) tmp = self['out_root_path'].strip().split("/") local_name_root = tmp[-2] + "_" + tmp[-1] print local_name_root self['loc_root_path'] = os.path.join(self['local'], local_name_root) if not glob.glob(self['loc_root_path']): os.mkdir(self['loc_root_path']) # The subdirctionary under root_output path # 1 scratch path (in cluster version the scratch path on a local disk) self.setSubpath('scr_path', 'out_root_path', 'scratch') # 2 results path self.setSubpath('result_path', 'out_root_path', 'results') # 3 the path that stores doc and log files self.setSubpath('doc_path', 'out_root_path', 'process_details') # 4 the path that stores template MR models self.setSubpath('ser_path', 'out_root_path', 'template_models') # 5 the path that stores xml files self.setSubpath('xml_path', 'out_root_path', 'xml' ) # 6 the path that stores template solution self.setSubpath('tem_path', 'doc_path', 'template_solutions') # MR search model if self.has_key('ccp4i_root'): self['mr_search_mod'] = self['ccp4i_root'] + "_MR_SEARCH_MOD.pdb" # Get rid of all white space and print all key words and the related values i_done =0 for a_key in self.keys(): self[a_key] = self[a_key].rstrip() self[a_key] = self[a_key].lstrip() if a_key in ["infile_seq", "infile_pdb", "infile_mtz"] \ and i_done == 0: self['name_root'] = self[a_key].split("/")[-1].split(".")[0] i_done = 1 # log file and brief summary for the whole process self['process_log_name'] = os.path.join(self['result_path'],"Process_information.txt") self['process_log_content'] = "" self['process_sum_name'] = os.path.join(self['result_path'],"Summary.txt") print "BALBES Version is %s " % self['version'] print "JOB STARTS AT ", self['start_time'] # print "Output path is %s " % self['out_root_path'] self['process_log_content'] += ("BALBES Version is %s \n" % self['version']) self['process_log_content'] += ("JOB FOR STARTS AT %s \n" %self['start_time']) # self['process_log_content'] += ("Output directory is %s \n" % self['out_root_path']) #-----------------------------------------------------------# print "\n#############################################################" print "| %s |" %"Please cite the following paper in any publication".ljust(57) print "| %s |" %"arising from use of BALBES:".ljust(57) print "| %s |" %"F.Long, A.Vagin, P.Young and G.N.Murshudov".ljust(57) print "| %s |" %"\"BALBES: a Molecular Replacement Pipeline\" ".ljust(57) print "| %s |"%"Acta Cryst. D64 125-132(2008)".ljust(57) print "#############################################################" self['process_log_content'] +=("\n#############################################################\n") self['process_log_content'] +=("| %s |\n" %"Please cite the following paper in any publication".ljust(57)) self['process_log_content'] +=("| %s |\n" %"arising from use of BALBES:".ljust(57)) self['process_log_content'] +=("| %s |\n" %"F.Long, A.Vagin, P.Young and G.N.Murshudov".ljust(57)) self['process_log_content'] +=("| %s |\n" %"\"BALBES: a Molecular Replacement Pipeline\" ".ljust(57)) self['process_log_content'] +=("| %s |\n"%"Acta Cryst. D64 125-132(2008)".ljust(57)) self['process_log_content'] +=("#############################################################\n") try: file_log = open(self['process_log_name'], "w") except IOError : print "can not open file %s for writing" %file_log sys.exit(1) file_log.write(self['process_log_content']) file_log.close() self['process_log_content'] = "" # solution validation self['chain_expected'] = 0 self['chain_found'] = 0 # solution checke, internal usage self['best_model'] = Model() def setSystem( self ): # set up the system path if os.environ.has_key("BALBES_ROOT"): self['sys_path'] = os.getenv("BALBES_ROOT") else: print "WARN: You have not setup the key environment variable, BALBES_ROOT ." print "execute the following command line depending on the shell you use, " print "under csh or tcsh, execute 'setenv BALBES_ROOT (Dir)/BALBES_0.0.1', " print "under bash, execute 'export BALBES_ROOT=(Dir)/BALBES_0.0.1', " print "where (Dir) is the directory you put BALBES_0.0.1 " print "We strongly suggest that you write the above command line into " print "your .cshrc(or .tcshrc), or .bashrc file. " sys.exit() self['seqDB_path'] = os.path.join(self['sys_path'], "seq_DB") self['pdbDB_path'] = os.path.join(self['sys_path'], "pdb_DB") self['list_path'] = os.path.join(self['sys_path'], "list") self['data_path'] = os.path.join(self['sys_path'], "data") self['loc_sys_path'] = self['local'] # version info about BALBES v_file_name = os.path.join(self['list_path'], "version.txt") if glob.glob(v_file_name): v_file = open(v_file_name, "r") self['version'] = v_file.readline().strip() else : self['version'] = "unknown" # CCP4 related if os.environ.has_key('CCP4'): self['ccp4'] = os.getenv("CCP4") self['bin_path'] = os.path.join(self['ccp4'], "bin") self['lig_path'] = os.path.join(self['ccp4'], "lib", "data", "monomers") else : raise RuntimeError, "You need to install 'ccp4' suite first" def matchKW( self, t_word, t_argvs, t_len_argvs, t_argv ): t_key = "" if t_word == "-pout" or t_word == "OUT_ROOTDIR" or t_word == "-o" : t_key = "out_root_path" elif t_word == "NAME_ROOT" : t_key = "ccp4i_root" elif t_word == "-fseq" or t_word == "SEQIN" or t_word == "-s": t_key = "infile_seq" elif t_word == "-fpdb" or t_word == "-m": t_key = "infile_pdb" elif t_word == "-lpdb" or t_word == "-l": t_key = "infile_pdb_lib" elif t_word == "-frsf" or t_word == "HKLIN" or t_word == "-f": t_key = "infile_mtz" elif t_word == "-x" or t_word == "XYZFX" : t_key = "fixed_mod" elif t_word == "HKLOUT" : t_key = "outfile_hkl" elif t_word == "XYZOUT" : t_key = "outfile_xyz" elif t_word == "-fsol" or t_word == "SOLIN": t_key = "infile_sol" messege1 = "You enter key word %s, but forget entering the associated file or path " %t_word messege2 = "You enter key word %s, but the associated file does not exist!" %t_word messege3 = "Wrong key word %s !" %t_word # output path if t_len_argvs <= t_argv + 1 : print messege1 sys.exit() elif not t_key : print messege3 sys.exit() else : if t_argvs[t_argv+1][0] == "-": print messege1 sys.exit() if t_argvs[t_argv+1][0] == "/" or t_argvs[t_argv+1][0] == ".": # user entered abs output path self[t_key] = t_argvs[t_argv+1] elif t_key == "infile_sol" : self[t_key] = os.path.join(self['local_pdb_path'], t_argvs[t_argv+1]) print self[t_key] else : self[t_key] = os.path.join(self['cur_path'], t_argvs[t_argv+1]) # check all input file and dir if not glob.glob(self[t_key]) and not t_key in ["outfile_xyz", "outfile_hkl", "ccp4i_root"]: if t_key == "out_root_path": os.mkdir(self['out_root_path']) else: print messege2 sys.exit() def setSubpath( self, t_key, t_parent, t_tag ): self[t_key] = os.path.join(self[t_parent], t_tag) if not glob.glob(self[t_key]): os.mkdir(self[t_key]) def CMDHelp(): print "BALBES USAGE " print "1: To run BALBES for molecular replacement and refinement: " print " balbes -o your_output_directory -f your_sf_file(cif or mtz) -s your_sequence (FASTA format)" print " e.g balbes -o job_xxxxx -f abcdef.mtz -s my_seq.seq \n" print "2: To run BALBES for molecular replacement and refinement using your own model PDB: " print " balbes -o your_output_dir -f your_sf_file(cif or mtz) -m your_own_pdb (optional : -s your_sequence)" print " e.g. balbes -o my_job_x -f xxxx.mtz -m xx.pdb (optional : -s xx.seq)\n" print "3: To run BALBES for molecular replacement and refinement using your own database consisting of " print "a group of pdb files: " print " balbes -o your_output_dir -f your_sf_file -l your_own_database -s your_sequence" print " e.g. balbes -o a_job -f a_sf.mtz -l myOwnDb/ -s a_seq.seq \n" print "4: To run BALBES for search models only: " print " balbes -o your_output_dirctory -s your_sequence_file (FASTA format)" print " e.g. balbes -o Mod_aaaaaa -s aaaaaaa.seq \n" def MoveFile(file1_ini, file_fin): """ make a check before mv a file to a file or directory""" if glob.glob(file1_ini): execmd = "mv " + file1_ini + " " + file_fin os.system(execmd) return file_fin def CopyFile(file1_ini, file_fin): """ make a check before cp a file to a file or directory""" if not glob.glob(file1_ini): return None else: execmd = "cp " + file1_ini + " " + file_fin os.system(execmd) return file_fin def WriteFile(file_name, line_grp) : """ write output to a file""" try: file_out = open(file_name, "a") except IOError : print "can not open file %s for writing" %file_name sys.exit(1) else : file_out.write(line_grp) file_out.close() line_grp = "" def ShowNameConv(file_name): try: file_out = open(file_name, "a") except IOError : print "can not open file %s for writing" %file_name sys.exit(1) else: print "\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" print "|------------------------------------------------------------------------------------|" print "| Name convention for models during BALBES session |" print "| |" print "|------------------------------------------------------------------------------------|" print "In the follow sessions, BALBES will find template models for MR and then try to solve them." print "Here is the name convention used for these template and solution models:" print "(1) if the results from model searching show " print "|-------------|----------------------------------------------------------------------|" print "| Name | ASSEM__2(2a74A+2a74B+2a74C) |" print "|-------------|----------------------------------------------------------------------|" print "| NO. of MODEL| 1 |" print "|-------------|----------------------------------------------------------------------|" print "| Model | Seq used | Similarity | Residues | Multimer? | Domain? | Monomers(expected)|" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 1 | 1,2,3, | 0.54 | 1109 | Trimer | No | 1 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "The name-tag for this model and the solution associated with it will contain: as2m1" print "and (a) this is a complex of models " print " (b) it contains input sequences 1, 2, 3 " print " (c) it orignates from PDB 2a74 " print " (d) it is a trimer " print "(2) if the results from model searching show " print "For the structure 2 in sequence 1 " print "|-------------|----------------------------------------------------------------------|" print "| PDB_CODE | 2qkiD |" print "|-------------|----------------------------------------------------------------------|" print "| NO. of MODEL| 6 |" print "|-------------|----------------------------------------------------------------------|" print "| Model | Chain ID | Similarity | Residues | Multimer? | Domain? | Monomers(expected)|" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 1 | D | 0.513(ENS) | 600 | Monomer | No | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 2 | D | 0.513(ENS) | 92 | Monomer | Yes | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 3 | D | 0.513 | 86 | Monomer | Yes | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 4 | D | 0.513(ENS) | 115 | Monomer | Yes | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 5 | D | 0.513(ENS) | 95 | Monomer | Yes | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "| 6 | D | 0.513(ENS) | 122 | Monomer | Yes | 2 |" print "|-------|----------|------------|----------|-----------|---------|-------------------|" print "The name-tag for these 6 models and their solutions will contain (in turn):" print "sq1st2m1, sq1st2m2, sq1st2m3, sq1st2m4, sq1st2m5, sq1st2m6" print "where (a) sq1st2m2, sq1st2m3, sq1st2m4, sq1st2m5, sq1st2m6 are domains," print " (b) (ENS) means the model coordinate file contains an ensemble of models" print "(3) if during the sessions, you see the name-tag of a model contains " print " sq1st2m2_sq3st1m4_sq2st3m1 " print "it means this model is built during the MR sessions from solving and then fixing, in turn," print " (a) sequence 1 structure(PDB) 2 model 2 " print " (b) sequence 3 structure(PDB) 1 model 4 " print " (c) sequence 2 structure(PDB) 3 model 1 " print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n" file_out.write("\n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n") file_out.write("|------------------------------------------------------------------------------------|\n") file_out.write("| Name convention for models during BALBES session |\n") file_out.write("| |\n") file_out.write("|------------------------------------------------------------------------------------|\n") file_out.write("In the follow sessions, BALBES will find template models for MR and then try to solve them.\n") file_out.write("Here is the name convention used for these template and solution models:\n") file_out.write("(1) if the results from model searching show \n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| Name | ASSEM__2(2a74A+2a74B+2a74C) |\n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| NO. of MODEL| 1 |\n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| Model | Seq used | Similarity | Residues | Multimer? | Domain? | Monomers(expected)|\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 1 | 1,2,3, | 0.54 | 1109 | Trimer | No | 1 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("The name-tag for this model and the solution associated with it will contain: as2m1\n") file_out.write("and (a) this is a complex of models \n") file_out.write(" (b) it contains input sequences 1, 2, 3 \n") file_out.write(" (c) it orignates from PDB 2a74 \n") file_out.write(" (d) it is a trimer \n") file_out.write("(2) if the results from model searching show \n") file_out.write("For the structure 2 in sequence 1 \n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| PDB_CODE | 2qkiD |\n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| NO. of MODEL| 6 |\n") file_out.write("|-------------|----------------------------------------------------------------------|\n") file_out.write("| Model | Chain ID | Similarity | Residues | Multimer? | Domain? | Monomers(expected)|\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 1 | D | 0.513(ENS) | 600 | Monomer | No | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 2 | D | 0.513(ENS) | 92 | Monomer | Yes | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 3 | D | 0.513 | 86 | Monomer | Yes | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 4 | D | 0.513(ENS) | 115 | Monomer | Yes | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 5 | D | 0.513(ENS) | 95 | Monomer | Yes | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("| 6 | D | 0.513(ENS) | 122 | Monomer | Yes | 2 |\n") file_out.write("|-------|----------|------------|----------|-----------|---------|-------------------|\n") file_out.write("The name-tag for these 6 models and their solutions will contain (in turn):\n") file_out.write("sq1st2m1, sq1st2m2, sq1st2m3, sq1st2m4, sq1st2m5, sq1st2m6\n") file_out.write("where (a) sq1st2m2, sq1st2m3, sq1st2m4, sq1st2m5, sq1st2m6 are domains,\n") file_out.write(" (b) (ENS) means the model coordinate file contains an ensemble of models\n") file_out.write("(3) if during the sessions, you see the name-tag of a model contains \n") file_out.write(" sq1st2m2_sq3st1m4_sq2st3m1 \n") file_out.write("it means this model is built during the MR sessions from solving and then fixing, in turn,\n") file_out.write(" (a) sequence 1 structure(PDB) 2 model 2 \n") file_out.write(" (b) sequence 3 structure(PDB) 1 model 4 \n") file_out.write(" (c) sequence 2 structure(PDB) 3 model 1 \n") file_out.write("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n") def WriteSummaryFile(file_name, t_model, models_processed = None) : try: file_out = open(file_name, "a") except IOError : print "can not open file %s for writing" %file_name sys.exit(1) else : if models_processed: # Put a summary of models processed here print "\nALL JOBS FINISHED==SUMMARY OF THE MR MODELS TRIED " print "|-------------------------------------------------------------------------------------------|" print "| MR Model | Search | Simil | Multimer? | Domain? | Monomers | R/R_free |" print "| | Model PDB | | | | expected/found | |" print "|------------------|-----------|-------|-----------|---------|----------------|-------------|" file_out.write("\nALL JOBS FINISHED==SUMMARY OF THE MR MODELS TRIED\n") file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("| MR Model | Search | Simil | Multimer? | Domain? | Monomers | R/R_free |\n") file_out.write("| | Model PDB | | | | expected/found | |\n") file_out.write("|------------------|-----------|-------|-----------|---------|--------------- |-------------|\n") if models_processed.has_key("ASSEM"): for a_model in models_processed['ASSEM'] : CopyFile(a_model.refi_sol_xyz, a_model.mod_dir) CopyFile(a_model.refi_sol_hkl, a_model.mod_dir) if a_model.nmr_enable: CopyFile(a_model.mr_in_xyz_nmr, a_model.mod_dir) else: CopyFile(a_model.mr_in_xyz, a_model.mod_dir) CopyFile(a_model.mr_log, a_model.mod_dir) CopyFile(a_model.ref_log, a_model.mod_dir) t_i = a_model.get_index() t_idx = "ASSEM" + str(t_i[0])+"_STR" + str(t_i[1]) \ + "_MOD" + str(t_i[2]) t_nmon = str(a_model.get_nmon()) + "/"+ str(a_model.get_nmon_solution()) s_pdb = a_model.s_model_pdb + a_model.get_index_chain() if a_model.R_fin == 1 : t_rall = "N/a" else : t_r = str(a_model.R_fin)[0:5] t_rf = str(a_model.R_free_fin)[0:5] t_rall =t_r + "/" + t_rf print "|%s|%s|%s|%s|%s|%s|%s|" \ %(t_idx.center(18),s_pdb.center(11), \ str(a_model.get_similarity()).center(7), \ a_model.get_multimer().center(11),a_model.get_domain().center(9),\ t_nmon.center(16), t_rall.center(13)) print "|-------------------------------------------------------------------------------------------|" file_out.write("|%s|%s|%s|%s|%s|%s|%s|\n" \ %(t_idx.center(18), s_pdb.center(11),\ str(a_model.get_similarity()).center(7), \ a_model.get_multimer().center(11),a_model.get_domain().center(9),\ t_nmon.center(16), t_rall.center(13))) file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("|-------------------------------------------------------------------------------|\n") if models_processed.has_key("SEQS"): for a_model in models_processed['SEQS'] : CopyFile(a_model.refi_sol_xyz, a_model.mod_dir) CopyFile(a_model.refi_sol_hkl, a_model.mod_dir) if a_model.nmr_enable: CopyFile(a_model.mr_in_xyz_nmr, a_model.mod_dir) else: CopyFile(a_model.mr_in_xyz, a_model.mod_dir) CopyFile(a_model.mr_log, a_model.mod_dir) CopyFile(a_model.ref_log, a_model.mod_dir) t_i = a_model.get_index() t_idx = "SEQ" + str(t_i[0])+"_STR" + str(t_i[1]) \ + "_MOD" + str(t_i[2]) t_nmon = str(a_model.get_nmon()) + "/"+ str(a_model.get_nmon_solution()) s_pdb = a_model.s_model_pdb + a_model.get_index_chain() if a_model.R_fin == 1 : t_rall = "N/a" else : t_r = str(a_model.R_fin)[0:5] t_rf = str(a_model.R_free_fin)[0:5] t_rall =t_r + "/" + t_rf print "|%s|%s|%s|%s|%s|%s|%s|" \ %(t_idx.center(18), s_pdb.center(11), \ str(a_model.get_similarity()).center(7), \ a_model.get_multimer().center(11),a_model.get_domain().center(9),\ t_nmon.center(16), t_rall.center(13)) print "|-------------------------------------------------------------------------------------------|" file_out.write("|%s|%s|%s|%s|%s|%s|%s|\n" \ %(t_idx.center(18), s_pdb.center(11), \ str(a_model.get_similarity()).center(7), \ a_model.get_multimer().center(11),a_model.get_domain().center(9),\ t_nmon.center(16), t_rall.center(13))) file_out.write("|-------------------------------------------------------------------------------------------|\n") # Temporarily print "\nSOLUTION SUMMARY " file_out.write("\nSOLUTION SUMMARY \n") Q_min = 0.30 if t_model.Q > Q_min: qfac = "%4.3f"%t_model.Q prob = "" if t_model.Q > 0.62: prob = "99.0%" else: x = (0.65-t_model.Q)*4 xx = -x*x p = math.exp(xx)*100 prob = "%3.2f"%p + "%" a_line = "Its probability to be a solution is %s"%prob print "#-------------------------------------------------------------------------------------------#" print "#%s#" %"A structure is suggested by BALBES".center(91) print "#%s#" %a_line.center(91) print "#-------------------------------------------------------------------------------------------#" file_out.write("#-------------------------------------------------------------------------------------------#\n") file_out.write("#%s#\n" %"A structure is suggested by BALBES".center(91)) file_out.write("#%s#\n" %a_line.center(91)) file_out.write("#-------------------------------------------------------------------------------------------#\n") print "The solution model index is %s"%t_model.name_tag file_out.write("The solution model index is %s\n"%t_model.name_tag) t_str_grp = t_model.refi_sol_xyz.split("/") t_n_root = "" for a_dir in t_str_grp[:-3]: t_n_root += a_dir + "/" t_n_root += "results/" f_root = "refmac_final_result" f_pdb = t_n_root + f_root + ".pdb" f_mtz = t_n_root + f_root + ".mtz" MoveFile(t_model.refi_sol_xyz,f_pdb) MoveFile(t_model.refi_sol_hkl,f_mtz) MoveFile(t_model.mr_sol_xyz, t_n_root) MoveFile(t_model.mr_log, t_n_root) MoveFile(t_model.refi_log, t_n_root) #CopyFile(t_model.refi_sol_xyz,f_pdb) #CopyFile(t_model.refi_sol_hkl,f_mtz) pdb_name = "results/"+ f_root + ".pdb" mtz_name = "results/"+ f_root + ".mtz" #pdb_name = "results/"+ t_model.refi_sol_xyz.split("/")[-1] #mtz_name = "results/"+ t_model.refi_sol_hkl.split("/")[-1] print "|-------------------------------------------------------------------------------------------|" print "| ITS PDB FILE |%s|" %pdb_name.center(70) print "|-------------------------------------------------------------------------------------------|" print "| ITS MTZ FILE |%s|" %mtz_name.center(70) print "|-------------------------------------------------------------------------------------------|" print "| R_ini/R_fin | %8.4f/%-8.4f | Rfree_ini/Rfree_fin | %8.4f/%-8.4f |" \ %(t_model.R_ini, t_model.R_fin, t_model.R_free_ini, t_model.R_free_fin) print "|-------------------------------------------------------------------------------------------|" print "| ITS Q FACTOR |%s|" %qfac.center(70) print "|-------------------------------------------------------------------------------------------|" file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("| ITS PDB FILE |%s|\n" %pdb_name.center(70)) file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("| ITS MTZ FILE |%s|\n" %mtz_name.center(70)) file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("| R_ini/R_fin | %8.4f/%-8.4f | Rfree_ini/Rfree_fin | %8.4f/%-8.4f |\n" \ %(t_model.R_ini, t_model.R_fin, t_model.R_free_ini, t_model.R_free_fin)) file_out.write("|-------------------------------------------------------------------------------------------|\n") file_out.write("| ITS Q FACTOR |%s|\n" %qfac.center(70)) file_out.write("|-------------------------------------------------------------------------------------------|\n") #f t_model.mr_in_xyz.strip(): # ser_name = "results/" + t_model.mr_in_xyz.strip().split("/")[-1] # if t_model.get_nres(): # ser_name = ser_name + "(Residues%s)"%str(t_model.get_nres()) # print "| Search Model |%s|" %ser_name.center(70) # print "|-------------------------------------------------------------------------------------------|" # file_out.write("| Search Model |%s|\n" %ser_name.center(70)) # file_out.write("|-------------------------------------------------------------------------------------------|\n") else : print "#-------------------------------------------------------------------------------------------#" print "#%s#" %"No solution is found".center(91) print "#-------------------------------------------------------------------------------------------#" file_out.write("#-------------------------------------------------------------------------------------------#\n") file_out.write("#%s#\n" %"No solution is found".center(91)) file_out.write("#-------------------------------------------------------------------------------------------#\n") file_out.close() def WriteSummaryFileHTHL(file_name, t_model) : pass def OutStaticsFile(file_name, resol, in_mod) : """ case file for statistics """ try: file_out = open(file_name, "w") except IOError : print "can not open file %s for writing" %file_name sys.exit(1) else : lin_grp = "Resolution: %s \n" %resol if in_mod : lin_grp += "R: %s \n" %in_mod.r lin_grp += "R_free: %s \n" %in_mod.r_free lin_grp += "MR_Score: %6.3f \n" %in_mod.get_mr_score() if in_mod.get_mr_score_prev(): lin_grp += "MR_Score_pre: %s \n" %in_mod.get_mr_score_prev() lin_grp += "\n Overall Properties \n" if in_mod.BFactors: lin_grp += "Number of chains: %d\n" %in_mod.BFactors['overall']['all'].number lin_grp += "B factor averaged over all: %7.3f \n" %in_mod.BFactors['overall']['all'].average lin_grp += "Sigma B_overall: %7.3f\n" %in_mod.BFactors['overall']['all'].sigma lin_grp += "number of main chains: %d\n" %in_mod.BFactors['overall']['main'].number lin_grp += "B factor averaged over all main chains: %7.3f \n" %in_mod.BFactors['overall']['main'].average lin_grp += "Sigma B_overallmainchains: %7.3f\n" %in_mod.BFactors['overall']['main'].sigma lin_grp += "Number of side chains: %d\n" %in_mod.BFactors['overall']['side'].number lin_grp += "B factor averaged over all side chains: %7.3f \n" %in_mod.BFactors['overall']['side'].average lin_grp += "Sigma B_overallsidechains: %7.3f\n\n" %in_mod.BFactors['overall']['side'].sigma if in_mod.Occupancy: lin_grp += "Occupancy averaged over all: %7.3f \n" %in_mod.Occupancy['overall']['all'].average lin_grp += "Sigma O_overall: %7.3f\n" %in_mod.Occupancy['overall']['all'].sigma lin_grp += "number of main chains: %d\n" %in_mod.Occupancy['overall']['main'].number lin_grp += "Occupancy averaged over all main chains: %7.3f \n" %in_mod.Occupancy['overall']['main'].average lin_grp += "Sigma O_overallmainchains: %7.3f\n" %in_mod.Occupancy['overall']['main'].sigma lin_grp += "Number of side chains: %d\n" %in_mod.Occupancy['overall']['side'].number lin_grp += "Occupancy averaged over all side chains: %7.3f \n" %in_mod.Occupancy['overall']['side'].average lin_grp += "Sigma O_overallsidechains: %7.3f\n\n" %in_mod.Occupancy['overall']['side'].sigma # Now information about chains n_chains = len(in_mod.BFactors['chains']) # in_mod.get_nchains() if n_chains > 1: lin_grp += "Properties for each chain \n" lin_grp += "Chain NRES ScoreC Occupancy O_sigma B_factor B_sigma\n" for a_chain in in_mod.BFactors['chains']: i_f = 0 t_id = a_chain.get_chainID() for i in range(in_mod.get_nchains()) : chain_found_id = in_mod.found_chains[i].get_chainID() if t_id[0] == chain_found_id : i_f = 1 break for o_chain in in_mod.Occupancy['chains'] : if t_id == o_chain.get_chainID(): break if i_f == 1 : lin_grp += "%s %d %6.3f %6.3f %6.3f %6.3f %6.3f\n" \ %(chain_found_id, a_chain.b_all.number, \ in_mod.found_chains[i].get_scoreC(), o_chain.o_all.average,\ o_chain.o_all.sigma, a_chain.b_all.average, a_chain.b_all.sigma) else: lin_grp += "%s %d %6.3f %6.3f %6.3f %6.3f\n"\ %(t_id, a_chain.b_all.number,\ o_chain.o_all.average, o_chain.o_all.sigma,\ a_chain.b_all.average, a_chain.b_all.sigma) lin_grp += "\nProperties for each main chain \n" lin_grp += "mainChain NRES Occupancy O_sigma B_factor B_sigma\n" for a_chain in in_mod.BFactors['chains']: t_id = a_chain.get_chainID() for o_chain in in_mod.Occupancy['chains'] : if t_id == o_chain.get_chainID(): break lin_grp += "%s %d %6.3f %6.3f %6.3f %6.3f\n"\ %(t_id, a_chain.b_main.number,\ o_chain.o_main.average, o_chain.o_main.sigma,\ a_chain.b_main.average, a_chain.b_main.sigma) lin_grp += "\nOver all properties for each side chain \n" lin_grp += "mainChain NRES Occupancy O_sigma B_factor B_sigma\n" for a_chain in in_mod.BFactors['chains']: t_id = a_chain.get_chainID() for o_chain in in_mod.Occupancy['chains'] : if t_id == o_chain.get_chainID(): i_g = 1 break lin_grp += "%s %d %6.3f %6.3f %6.3f %6.3f\n"\ %(t_id, a_chain.b_side.number,\ o_chain.o_side.average, o_chain.o_side.sigma,\ a_chain.b_side.average, a_chain.b_side.sigma) else : lin_grp += "No solution Model" file_out.write(lin_grp) file_out.close() def AssessQ(t_model, t_r_free_ini = None, t_r_ini = None, t_file_o_name = None ): print "\n#----------------------------------------------#" print "# The Model Assesment #" print "#----------------------------------------------#" if t_file_o_name : t_file_o = open (t_file_o_name, "a") t_file_o.write("\n#----------------------------------------------#\n") t_file_o.write("# The Model Assesment #\n") t_file_o.write("#----------------------------------------------#\n") weight = 0.75 if t_r_free_ini and t_r_ini : # use R_ini/R_free_ini calculated in previously rounds of refinement r_comb_ini = (1-weight)*t_r_ini + weight*t_r_free_ini else : # use R_ini/R_free_ini in current round of refinement r_comb_ini = (1-weight)*t_model.R_ini + weight*t_model.R_free_ini r_comb_fin = (1-weight)*t_model.R_fin + weight*t_model.R_free_fin dr_comb = r_comb_ini - r_comb_fin # t_model.d_R_free = r_free_ini - t_model.R_free_fin if t_file_o_name : t_file_o.write("d_R_free = %5.4f\n"%dr_comb) t_model.Q = 1 - (2*r_comb_fin*r_comb_fin)/(1+dr_comb) # t_model.Q = 1 - (2*t_model.R_free_fin*t_model.R_free_fin)/(1+t_model.d_R_free) t_file_o.close() # Function that set the details of test case "code_name" def SetCaseDetail(code_name, case_detail_dict, pdb_sol_file_name): if code_name: case_detail_dict[code_name] = { } case_detail_dict[code_name] ['identity_best'] = 100.0 case_detail_dict[code_name]['chain_expected'] = 0.0 case_detail_dict[code_name]['chain_found'] = 0.0 print "The detail of test case %s are:" % code_name if glob.glob(pdb_sol_file_name): pdb_sol_file = open(pdb_sol_file_name, "r") pdb_sol_content = pdb_sol_file.readlines() for pdb_line in pdb_sol_content: pdb_strings = pdb_line.split() n_l = len(pdb_strings) if n_l != 0: if pdb_strings[0] == "HEADER": case_detail_dict[code_name] ['publish_date'] = pdb_strings[n_l-2] print "published in %s " % case_detail_dict[code_name] ['publish_date'] if pdb_line.find("METHOD USED TO DETERMINE THE STRUCTURE:") !=-1: if pdb_line.find("MOLECULAR REPLACEMENT") != -1: case_detail_dict[code_name] ['idx_method'] = 1 elif pdb_line.find("SAD") != -1 : case_detail_dict[code_name] ['idx_method'] = 2 elif pdb_line.find("MAD") != -1 : case_detail_dict[code_name] ['idx_method'] = 3 elif pdb_line.find("SIR") != -1 or pdb_line.find("SIRAS") != -1: case_detail_dict[code_name] ['idx_method'] = 4 elif pdb_line.find("MIR") != -1 or pdb_line.find("MIRAS") != -1: case_detail_dict[code_name] ['idx_method'] = 5 else : case_detail_dict[code_name] ['idx_method'] = 0 case_detail_dict[code_name] ['method'] = ' ' for n_str in range(8, len(pdb_strings)): case_detail_dict[code_name] ['method'] = case_detail_dict[code_name] ['method']\ + pdb_strings[n_str] + " " print "METHOD USED TO DETERMINE THE STRUCTURE: %s " \ % case_detail_dict[code_name] ['method'] print "INDEX OF METHOD USED TO DETERMINE THE STRUCTURE: %d " \ % case_detail_dict[code_name] ['idx_method'] if pdb_line.find("SOFTWARE USED:") !=-1: case_detail_dict[code_name] ['software'] = " " for n_str in range(4, len(pdb_strings)): case_detail_dict[code_name] ['software'] = case_detail_dict[code_name] ['software'] \ + pdb_strings[n_str] + " " print "SOFTWARE USED: %s " % case_detail_dict[code_name] ['software'] # Function that calculate the successful rate for a method" def SetMethodRate(method_name, n_total, n_f_1, n_f_all): if n_total: p_found_one = float(n_f_1)/float(n_total) print "Total number of test cases solved initially by %s upto now is %d " \ % (method_name, n_total) print " in which %d test cases found at least one monomer" % n_f_1 print " The ratio is %.3f " % p_found_one p_found_all = float(n_f_all)/float(n_total) print " and %d test cases found all exepected monomers" % n_f_all print " The ratio is %.3f " % p_found_all # for model sorting def ModelCmp(mod_1, mod_2): if mod_1.get_nres() >= mod_2.get_nres() : return -1 else : return 1 def RandomStrigGenerator(): table = ["a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t", \ "u","v","w","x","y","z","a","B", "C", "D","E","F","G","H","I","J","K","L","M","N", \ "O","P","Q","R","S","T","U","V","W","X","Y","Z","0","1","2","3","4","5","6","7","8", "9"] a_ranstr = "" for i in range(10): pos = random.randint(1,62) a_ranstr = a_ranstr + table[pos-1] return a_ranstr def RandomNumGenerator(nd): table = ["0","1","2","3","4","5","6","7","8", "9"] a_rnum = "" for i in range(nd): pos = random.randint(0,9) a_rnum = a_rnum + table[pos] return a_rnum def RandomtimeString(nd): day, month, date, cur_t, year= time.ctime().split() a_time= time.strptime("%s %s %s"%(date, month, year), "%d %b %Y") c_year = str(a_time[0]) c_month = str(a_time[1]) if len(c_month) < 2: c_month = "0"+ c_month c_day = str(a_time[2]) c_hour, c_minute, c_second = cur_t.split(":") a_ramString = RandomNumGenerator(nd) a_ranTimeString = c_year + "_" + c_month + "_" + c_day + "_" + c_hour + "_" + c_minute + "_" + a_ramString return a_ranTimeString class TmpSGCheckandStand: def __init__( self, path_obj): self.path_obj = path_obj self.exe_reindex = os.path.join(os.getenv("CBIN"), "reindex") self.exe_regroup = os.path.join(os.getenv("BALBES_ROOT"),"bin/regroup") self.lib_regroup = os.path.join(os.getenv("BALBES_ROOT"), "data/regroup.lib") self.dir_regroup = os.path.join(self.path_obj['new_dir'], "Regroup") if not glob.glob(self.dir_regroup): os.mkdir(self.dir_regroup) def controller( self, t_model): t_strgrp = t_model['mtz'].strip().split("/") t_name = t_strgrp[-1].split(".")[0] t_standardlized_name = os.path.join(self.dir_regroup, t_name + "_standardlized") self.standardlized_pdb = os.path.join(self.dir_regroup, t_name + "_standardlized.pdb") self.standardlized_mtz = os.path.join(self.dir_regroup, t_name + "_standardlized.mtz") self.re_process_log = open(os.path.join(self.dir_regroup, "process.log"),"w") self.regroup_log = os.path.join(self.dir_regroup, "regroup.log") self.reindex_log = os.path.join(self.dir_regroup, "reindex.log") print self.standardlized_pdb print self.standardlized_mtz if os.path.isfile(t_model['pdb']): cmdline = "cat %s | %s %s %s > %s "%(t_model['pdb'], self.exe_regroup, self.lib_regroup, self.standardlized_pdb,self.regroup_log) print cmdline os.system(cmdline) if os.path.isfile(t_model['mtz']) and os.path.isfile(self.regroup_log): tmp_mtz = os.path.join(self.dir_regroup,"tmp.mtz") cmdline = 'grep "^A" %s | cut -c3- | %s hklin %s hklout %s > %s ' \ %(self.regroup_log, self.exe_reindex, t_model['mtz'], tmp_mtz, self.reindex_log) os.system(cmdline) if os.path.isfile(tmp_mtz): cmdline = 'grep "^B" %s | cut -c3- | %s hklin %s hklout %s >> %s ' \ %(self.regroup_log, self.exe_reindex, tmp_mtz, self.standardlized_mtz, self.reindex_log) os.system(cmdline) else: self.re_process_log.write("reindex of sym opt failed") self.re_process_log.close() sys.exit(1) else : self.re_process_log.write("can not find input mtz file to regroup or regroup failed") self.re_process_log.close() sys.exit(1) else : self.re_process_log.write("can not find input pdb file to regroup") sys.exit() if os.path.isfile(self.standardlized_mtz): t1 = open(self.regroup_log, "r") for line in t1.readlines(): if line.find("symm") != -1: new_sg = line.strip().split('"')[1] break self.re_process_log.write("Before Standardlization, the solution space group is %s \n"%t_model['sg']) self.re_process_log.write("After Standardlization, the solution space group is %s \n"%new_sg) self.re_process_log.close() t_model['mtz'] = self.standardlized_mtz t_model['pdb'] = self.standardlized_pdb t_model['std_sg'] = new_sg def TemSGCheck(t_model): # a temp function to check if the space group is a non-standard one (sg number >= 1000 (230) # the current version of ARP/wARP (v7.0) can not handle with it sg_type = False all_sg_type = {} try: fsym_name = os.path.join(os.getenv("CLIB"), "data/symop.lib") fsym = open(fsym_name, "r") except IOError: print "can not open CCP4 symop.lib, not space group check for ARP/wARP" return sg_type else : for a_line in fsym.readlines(): if a_line: if a_line[0] != " ": strgrps = a_line.strip().split("'")[1].split() ####### add a line here sg_simpl = "" for a_ch in strgrps: sg_simpl +=a_ch sg_simpl = sg_simpl.upper() all_sg_type[sg_simpl] = a_line.split()[0] user_strgps =t_model['sg'].split() user_sg = "" for a_ch in user_strgps: user_sg +=a_ch user_sg = user_sg.upper() if user_sg in all_sg_type.keys(): if int(all_sg_type[user_sg]) < 1000: sg_type = True return sg_type def getResNum(file_xyz, f1 = None): a_list = ['ALA', 'LEU', 'ARG', 'LYS', 'ASN', 'MET', 'ASP', 'PHE', 'CYS', 'PRO', 'GLY', 'SER', 'GLN', 'THR', 'GLU', 'TRP', 'HIS', 'TYR', 'ILE', 'VAL'] n_res = 0 if glob.glob(file_xyz): file_in = open(file_xyz, "r") res_pre = "" res_aft = "" n_res = 0 n_col = 0 # need to consider col merge in the file for a_line in file_in.readlines(): strgrp = a_line.strip().split() if strgrp and strgrp[0].find("ATOM") != -1 : if not res_pre : res_pre = strgrp[3] if res_pre.upper() in a_list: n_res +=1 else: res_pre = "" else: res_aft = strgrp[3] if res_aft.upper() in a_list: if res_aft != res_pre: n_res += 1 res_pre = res_aft file_in.close() else : print "could not find %s for reading "%file_xyz if f1 : f1 = "could not find %s for reading\n"%file_xyz return n_res