#! /usr/bin/env ccp4-python # # Copyright (C) 2005 Ronan Keegan # # This code is distributed under the terms and conditions of the # CCP4 Program Suite Licence Agreement as a CCP4 Application. # A copy of the CCP4 licence can be obtained by writing to the # CCP4 Secretary, Daresbury Laboratory, Warrington WA4 4AD, UK. # # Ronan Keegan 23/11/04 import os, sys, string, operator, time from urllib import urlretrieve import shutil import Model_struct import PDB_info import MRBUMP_MAFFT import MRBUMP_PROBCONS import MRBUMP_T_COFFEE import MRBUMP_CLUSTALW import MRBUMP_pdbset import MRBUMP_pdbcur import MRBUMP_chainsaw class PQS_struct: """ A structure to hold details about each of the PQS models that are found. """ """ file_list in PQS is a dictionary that will hold these structures. """ def __init__(self): self.chainname="" self.filename="" self.parent_chain="" self.seqID=0.0 self.source="" def setSource(self, source): self.source=source class PQS: """ A class to retrieve any multimer structure files from the PQS database at the EBI that a relevent to our search. """ def __init__(self): self.pdb_code='' self.pqs_files=[] self.pqs_list_file='' self.pqs_asalist_file='' self.pqs_biolist_file='' self.chainsaw_ALIGNIN='' self.process_success=False self.search_num=0 try: self.debug=eval(os.environ['MRBUMP_DEBUG']) except: self.debug=False def setPDBcode(self, pdb_code): self.pdb_code=pdb_code def set_pqs_list(self, MRBUMP_SETUP_DIR): """ A function to set the PQS list files from the EBI database. Sets the file, ASA and BIO list files.""" # File list self.pqs_list_file=MRBUMP_SETUP_DIR + "/pqs_LIST.txt" # Test to see if the file exists. if not (os.path.isfile(self.pqs_list_file)) and debug: print "Error accessing PQS List file: file not found." # ASA list self.pqs_asalist_file=MRBUMP_SETUP_DIR + "/pqs_ASALIST.txt" # Test for files existance. if not (os.path.isfile(self.pqs_asalist_file)) and debug: print "Error accessing PQS ASA List file: file not found." # BIO list self.pqs_biolist_file=MRBUMP_SETUP_DIR + "/pqs_BIOLIST.txt" # Test for files existance. if not (os.path.isfile(self.pqs_biolist_file)): print "Error accessing PQS BIO List file: file not found." def getAlignment(self, input_PDB, chain, target_info, number_mols, MAPROGRAM): """ A function to retrieve the sequence of the multimer and align it with the target sequence for chainsaw. Returns: string""" # Call PDB_info the get the sequence of the multimer p=PDB_info.PDB_info() sequence = p.getSequence(input_PDB) # Write this sequence and the target sequence to a fasta file so they can be aligned aln_in_file = os.path.join(os.environ['CCP4_SCR'], 'alignment_' + chain + '_in.fasta') aln_out_file = os.path.join(os.environ['CCP4_SCR'], 'alignment_' + chain + '_out.pir') infile=open(aln_in_file, 'w') infile.write(">" + target_info.chainID + "\n") infile.write(target_info.sequence * number_mols + "\n") infile.write(">" + chain + "\n") infile.write(sequence + "\n") infile.close() # Set the log file for the alignment program output aln_logfile=os.path.join(os.environ['CCP4_SCR'], "pqs_alignment.log") # Now do the alignment (using MAFFT, Probcons, TCoffee or Clustalw) if MAPROGRAM=='MAFFT': ma=MRBUMP_MAFFT.Mafft() ma.Mafft(aln_in_file, aln_out_file, aln_logfile) ma.convert_to_pir(aln_out_file) elif MAPROGRAM=='PROBCONS': pc=MRBUMP_PROBCONS.Probcons() pc.setLogFile(aln_logfile) pc.Probcons(aln_in_file, aln_out_file) elif MAPROGRAM=='T_COFFEE': tc=MRBUMP_T_COFFEE.T_coffee() tc.setLogFile(aln_logfile) tc.T_coffee(aln_in_file, aln_out_file) elif MAPROGRAM=='CLUSTALW': cl=MRBUMP_CLUSTALW.Clustalw() cl.Clustalw(aln_in_file, aln_out_file, aln_logfile) return aln_out_file def processPDB(self, mstat, input_PDB, output_PDB, chain, target_info, number_mols, MAPROGRAM): """ A function to process the coordinate files for the PQS matches. Processes them using PDBset and PDBcur. """ # Set the processing success flag to true self.process_success = True # set up some temporay files for intermediate steps in the processing pdbset_output_PDB = os.path.join(os.environ['CCP4_SCR'], 'pdbset_' + chain + '.pdb') pdbcur_output_PDB = os.path.join(os.environ['CCP4_SCR'], 'pdbcur_' + chain + '.pdb') # exlcude waters using PDBset pset=MRBUMP_pdbset.PDBset() pset.setKEYWORD("EXCLUDE HOH") pset_code = pset.pdbset(input_PDB, pdbset_output_PDB) if pset_code == 1: if self.debug: sys.stdout.write("\n") sys.stdout.write("Chain: %s failed in PQS PDB processing (pdbset)\n" % chain) sys.stdout.write("\n") self.process_success = False else: # delete hydrogens and select most probable confimations for side chains with PDBcur pcur=MRBUMP_pdbcur.PDBcur() pcur.setKEYWORD("delh") pcur.setKEYWORD("cutocc") pcur.setKEYWORD("mostprob") pcur_code = pcur.pdbcur(pdbset_output_PDB, pdbcur_output_PDB) if pcur_code == 1: if self.debug: sys.stdout.write("\n") sys.stdout.write("Chain: %s failed in PQS PDB processing (pdbcur)\n" % chain) sys.stdout.write("\n") self.process_success = False else: # Get the alignment to give to Chainsaw #alignin = self.getAlignment(pdbcur_output_PDB, chain, target_info, number_mols, MAPROGRAM) alignin = self.chainsaw_ALIGNIN # Call chainsaw to do the side-chain pruning csaw=MRBUMP_chainsaw.Chainsaw() csaw_code = csaw.chainsaw(pdbcur_output_PDB, alignin, output_PDB) if csaw_code == 1: if self.debug: sys.stdout.write("\n") sys.stdout.write("Chain: %s failed in PQS PDB processing (chainsaw)\n" % chain) sys.stdout.write(" XYZIN: %s\n" % pdbcur_output_PDB) sys.stdout.write(" ALIGNIN: %s\n" % alignin) sys.stdout.write(" XYZOUT: %s\n" % output_PDB) sys.stdout.write("\n") self.process_success = False def get_pqs_files(self, init, target_info, mstat, MAPROGRAM): """ A function to scan the PQS list file and download any multimers related to our search models. """ # Setup a dictionary for storing the multimer information multi_list=dict([]) file_list=dict([]) no_of_multimers = 0 list=open(self.pqs_list_file) asalist=open(self.pqs_asalist_file) ignore=[] # Loop over all of our matching PDB's for chain in mstat.chain_list.keys(): self.setPDBcode(mstat.chain_list[chain].PDBName) # Scan the PQS list file for any possible multimer files line=list.readline() while line: if self.pdb_code in line: # Create a PQS structure p=PQS_struct() p.filename=string.strip(line) p.chainname=(string.split(p.filename, "."))[0] p.parent_chain=chain p.seqID=mstat.chain_list[chain].seqID p.setSource("PQS") # Add this file to the file list dict file_list[p.filename]=p self.pqs_files.append(p.filename) line=list.readline() list.seek(0) list.close() # Populate the multimer structure and download each of the multimeric files found for file in file_list.keys(): m=Model_struct.Models_struct() name=string.split(file,'.') # Set the number of molecules in this multimer. If not found in ASA list default is # a monomer or = 1 line=asalist.readline() while line: if name[0] in line: m.setNumberMols(int(string.split(line)[4])) m.setMultiType(string.split(line)[2]) line=asalist.readline() asalist.seek(0) # Check to see that the multimer is useful #if m.num_mols == 1 or 'HOMO' not in m.multi_type or operator.mod(target_info.no_of_mols, m.num_mols) != 0: if m.number_mols == 1 or 'HOMO' not in m.multi_type: if m.number_mols == 1: print "PQS log message: " + name[0] + " rejected as it has no non-monomer multimers" elif 'HOMO' not in m.multi_type: print "PQS log message: " + name[0] + " rejected as it has hetrogeneous molecules" #else: # print "PQS log message: " + name[0] + " rejected as it does not have the correct number of molecules for this target" # Otherwise, add this multimer to the list else: if '_' in name[0]: m.setPDBName(name[0][0:4]) m.setModelChainSource(name[0]) m.setModelName(name[0]) else: m.setPDBName(name[0]) m.setModelChainSource(name[0] + '_0') m.setModelName(name[0] + '_0') m.seqID.append(file_list[file].seqID) m.setModelTypeExtension(name[1]) m.setModelType('MULTMR') # Make a directory for this match m.setModel_directory(os.path.join(init.search_dir, 'data', m.chain_source)) if not os.path.exists(m.model_directory): os.mkdir(m.model_directory) pdb_file_dir = os.path.join(m.model_directory, 'pdb_file') if not os.path.exists(pdb_file_dir): os.mkdir(pdb_file_dir) mr_dir = os.path.join(m.model_directory, 'mr') if not os.path.exists(mr_dir): os.mkdir(mr_dir) if "MOLREP" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep')) os.mkdir(os.path.join(m.model_directory, 'mr', 'molrep', 'refine')) if "PHASER" in init.keywords.MR_PROGRAM_LIST: os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser')) os.mkdir(os.path.join(m.model_directory, 'mr', 'phaser', 'refine')) # Set the location of the PDB file m.PDBfile.append(os.path.join(m.model_directory, 'pdb_file', m.chain_source + '.pdb')) unprocessedPDBfile = os.path.join(init.search_dir, 'PDB_files', m.chain_source + '.' + m.type_extension) pqs_file_url="http://pqs.ebi.ac.uk/pqs-doc/macmol/" + file #local_file=init.search_dir + '/data/' + m.chain_source + '/pdb_files/' + m.PDBfile[0] try: urlretrieve(pqs_file_url, unprocessedPDBfile) # Set the input alignment file for chainsaw self.chainsaw_ALIGNIN=mstat.chain_list[file_list[file].parent_chain].pir_aln_file # Process the coordinate file self.processPDB(mstat, unprocessedPDBfile, m.PDBfile[0], m.chain_source, target_info, m.number_mols, MAPROGRAM) # Add this multimer to the dictionary list if self.process_success == True: mstat.model_list[m.chain_source]=m no_of_multimers = no_of_multimers + 1 except: sys.stdout.write("\n") sys.stdout.write("PQS download Error: Could not retrieve PQS file for chain: %s\n" % m.chain_source) sys.stdout.write(" There may be a problem with the internet connection.\n") sys.stdout.write(" Do you need to set a proxy server?\n") sys.stdout.write("\n") asalist.close() # Set the number of multimers tracker variable mstat.setNoofPQSHits(no_of_multimers) print "\n" print "PQS log message: %d multimers have been found and added to model list\n" % mstat.no_of_PQShits for model in mstat.model_list.keys(): if mstat.model_list[model].number_mols > 1: print "PQS log message: Multimer found, chain ID: %s" % mstat.model_list[model].chain_source print "PQS log message: Base PDB code: %s" % mstat.model_list[model].PDBName print "PQS log message: Type: %s, Num of molecules: %s\n" % (mstat.model_list[model].type, mstat.model_list[model].number_mols) # Insert the multimers into the sorted list for MR for name in mstat.sorted_MR_list: # Make a note of the position of the current chain in the sorted list position = mstat.sorted_MR_list.index(name) # Loop over the model list and append the multimers in the correct order for model in mstat.model_list.keys(): # Check that we have not added it already if name not in ignore and mstat.chain_list.has_key(name) and \ mstat.chain_list[name].PDBName not in ignore: # If we have a multimer and the PDB id is the same as that in the list inset the multimer after the monomer(s) if mstat.model_list[model].number_mols > 1 and mstat.model_list[model].PDBName == mstat.chain_list[name].PDBName: mstat.sorted_MR_list.insert(position, mstat.model_list[model].chain_source) # Add the multimer and its parent PDB id to the ignore list ignore.append(mstat.model_list[model].chain_source) ignore.append(mstat.chain_list[name].PDBName)