#!/usr/bin/env ccp4-python # import os, sys, shutil, string, time import subprocess, signal, multiprocessing import argparse import random import copy import re import glob import platform import xml.etree.ElementTree as ET from xml.etree.ElementTree import ParseError import urllib, urllib2 from mmtbx.alignment import align headerMessage = """ ################################################################# ################################################################# ########## LOw REsolution STructure Refinement pipeline ######### ######################### (LORESTR) ############################# ############### Version beta ############ Jun 2016 ############## ################################################################# ## Copyright (C) 2015-2016 Oleg Kovalevskiy, Robert Nicholls ## ## and Garib Murshudov ## ## ## ## LORESTR is distributed in the hope that it will be useful, ## ## but WITHOUT ANY WARRANTY; without even the implied warranty ## ## of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ## ## See the GNU Lesser General Public License (v3) for details. ## ## The authors accept no liability whatsoever for any damage ## ## caused by the use or misuse of this software. ## ## ## ## E-mail: okovalev@mrc-lmb.cam.ac.uk ## ## ## ################################################################# """ helpMessage = """ Arguments: -xyzin, -p1, --pdb1 : PDB file for refinement -hklin, -f, --mtz : MTZ data file for refinement -p2, --pdb2 : PDB file(s) for external restraints generation -o : Output directory -xyzout : Output PDB file -hklout : Output MTZ data file -auto : Run BLAST search over PDB database, fetch homologous structures and use them for external restraints -pr : Protocol file -nc : Maximal number of chains for external restraints generation (default: 3) -nh : Maximal number of homologous structures (default: 5) -dna : Generate restraints for DNA/RNA chains -tls : Supply TLS file for refinement -save_space : Save disk space by removing ProSMART output (all scripts are left) -labin : Specify LABIN parameter for Refmac -phased : Use phased refinement. Please specify correct -labin parameter for Refmac as well. -free_1 : CNS-style Free flag ('FREE 1' option for Refmac) -mr : Use this option immediately after MR to run 200 cycles of jelly body refinement at the beginning -cpu : Number of CPUs used (default: 1) -minres : Minimal resolution for automatically downloaded homologues (default: 3.5 A) ################################################################# Minimal run (will try only jelly-body and hydrogen bonds restraints): lrsr.py -p1 1.pdb -f 1.mtz Typical run: lrsr.py -p1 1.pdb -f 1.mtz -p2 homologue1.pdb homologue2.pdb Automated run with BLAST search: lrsr.py -p1 1.pdb -f 1.mtz -auto ################################################################# """ def indent(elem, level=0): i = "\n" + level * " " if len(elem): if not elem.text or not elem.text.strip(): elem.text = i + " " if not elem.tail or not elem.tail.strip(): elem.tail = i for elem in elem: indent(elem, level + 1) if not elem.tail or not elem.tail.strip(): elem.tail = i else: if level and (not elem.tail or not elem.tail.strip()): elem.tail = i def generateNextDirectoryName(originalDirectory): if originalDirectory[-1] == os.sep: originalDirectory = originalDirectory[:-1] directories = glob.glob(originalDirectory + '_*') num = 0 match = None for directory in sorted(directories): match = re.search(originalDirectory+'_(\d+)$', directory) if match: if int(match.group(1)) > num: num = int(match.group(1)) if match: num += 1 return "%s_%d%s" % (originalDirectory, num, os.sep) else: return originalDirectory + '_1' + os.sep class Logger(object): def __init__(self, logFileName): self.terminal = sys.stdout try: self.log = open(logFileName, 'a') except: sys.stdout.write('Unable to create main log file %s; terminating.' % logFileName) sys.exit(1) def write(self, message): self.terminal.write(message) self.log.write(message) self.terminal.flush() self.log.flush() def flush(self): self.terminal.flush() self.log.flush() class ChainRestraints: def __init__(self, chain, restraintType): if restraintType not in ['Homologue', 'Hbond']: sys.stdout.write('Error: Restraints could be only "Homologue","Hbond", not "%s"\n' % restraintType) sys.exit(1) if len(chain) > 1: sys.stdout.write('Error: length of chain identifier can not be more than one letter.\n') sys.exit(1) self.chain = chain self.type = restraintType self.restraintList = [] # list of tuples [pdbFileName,chain] for external restrains with homologous structure class RefinementProtocol: def __init__(self, pdbToRefine, refinementType): if refinementType not in ['JellyBody', 'ExternalRestraints', 'AllHomologs']: sys.stdout.write('Error: Protocol could be only "JellyBody" or "ExternalRestraints" or "AllHomologs", not "%s"\n' % refinementType) sys.exit(1) if not os.path.isfile(pdbToRefine.pdbFileName): sys.stdout.write('Error: PDB file does not exist: %s\n' % pdbToRefine.pdbFileName) sys.exit(1) if not os.path.isfile(pdbToRefine.mtzFileName): sys.stdout.write('Error: mtz file does not exist: %s\n' % pdbToRefine.mtzFileName) sys.exit(1) self.pdbToRefine = pdbToRefine self.type = refinementType self.pdbChainRestraints = [] # list of ChainRestraints self.description = '' self.directory = '' self.refmacs = [] # list of Refmac instances for the protocol self.prosmarts = [] # list of Prosmart instances self.homologsFilesList = None self.restraint = '' # restrain-all or restrain-best for AllHomologs self.containerCmdFileName = '' self.containerCmd = [] self.started = False self.running = False self.executed = False self.ss = False self.restraintsFileName = '' self.dnaRestraintsFileName = '' self.isWindows = False def run(self): if len (self.containerCmd) > 0: outCom = open(self.containerCmdFileName, 'w') if not self.isWindows: outCom.write(shellHeader) outCom.write(''.join(self.containerCmd) + '\n\n') outCom.close() if not self.isWindows: st = os.stat(self.containerCmdFileName) os.chmod(self.containerCmdFileName, st.st_mode | 0111 ) #os.system('/bin/sh "%s"' % self.containerCmdFileName) self.running = True for prosmart in self.prosmarts: prosmart.run() prosmart.readLog() if not prosmart.isRunning(): if not prosmart.executed: self.executed = False self.running = False return if os.path.exists(prosmart.restraintsFileName) and len(self.restraintsFileName): prosmartRestraintsFile = open(prosmart.restraintsFileName, 'r') outputRestraintsFile = open(self.restraintsFileName, 'a') for line in prosmartRestraintsFile: outputRestraintsFile.write(line) prosmartRestraintsFile.close() outputRestraintsFile.close() if os.path.exists(self.dnaRestraintsFileName): dnaRestraintsFile = open(self.dnaRestraintsFileName, 'r') outputRestraintsFile = open(self.restraintsFileName, 'a') for line in dnaRestraintsFile: outputRestraintsFile.write(line) dnaRestraintsFile.close() outputRestraintsFile.close() if len(self.refmacs) == 3: # Special case of 3 Refmacs - testing external restraints with and without parameters, best one goes as input to jelly body self.refmacs[0].prepareRun() self.refmacs[0].run() if not self.refmacs[0].isRunning(): if not self.refmacs[0].executed: self.executed = False self.running = False return self.refmacs[0].readLog() self.refmacs[1].prepareRun() self.refmacs[1].run() if not self.refmacs[1].isRunning(): if not self.refmacs[1].executed: self.executed = False self.running = False return self.refmacs[1].readLog() self.refmacs[2].xyzin = min([self.refmacs[0], self.refmacs[1]], key=lambda y: y.log.getFinalRfree()).xyzout self.refmacs[2].prepareRun() self.refmacs[2].run() if not self.refmacs[2].isRunning(): if not self.refmacs[2].executed: self.executed = False self.running = False return self.refmacs[2].readLog() else: for refmac in self.refmacs: refmac.prepareRun() refmac.run() if not refmac.isRunning(): if not refmac.executed: self.executed = False self.running = False return refmac.readLog() self.executed = True self.running = False def isRunning(self): for prosmart in self.prosmarts: if prosmart.isRunning(): self.executed = False self.running = True return self.running else: if not prosmart.executed: self.executed = False self.running = False return self.running else: prosmart.readLog() if self.ss: prosmart.cleanAllOutput() for refmac in self.refmacs: if refmac.isRunning(): self.executed = False self.running = True return self.running else: if not refmac.executed: self.executed = False self.running = False return self.running else: refmac.readLog() self.executed = True self.running = False return self.running def terminate(self): for prosmart in self.prosmarts: if self.ss: prosmart.cleanAllOutput() class PDBtoRefine: def __init__(self, pdbFileName, mtzFileName): self.pdbFileName = pdbFileName self.mtzFileName = mtzFileName self.twin = False self.scalingLSQ = False self.solvent = {} self.startingValues = {} self.chains = {} # dictionary chainId : list of dictionaries: homologous chain:value, SeqID:value, local:value, global RMSD:value class ProSMART: def __init__(self, prosmart='prosmart', pdbToRefine=None, chainRestraint=None, outputDirectoryBase='ProSMART_', allHomologs=False, restraint='', allHomologsList=None, dna=False): if not chainRestraint and not allHomologsList and not dna: self.die('ProSMART: No chain restraints supplied; terminating.') self.prosmart = prosmart self.executed = False self.running = False self.pdbToRefine = pdbToRefine self.chainRestraint = chainRestraint self.restraints = [] self.outputDirectoryBase = outputDirectoryBase self.comFileName = '' self.logFileName = '' self.allHomologs = allHomologs self.allHomologsList = allHomologsList self.restraint = restraint self.outputDirectory = '' self.dna=dna self.restraintsFileName = '' self.prosmartWaitingTime = 120 # in seconds self.logSize = 0 self.logTime = 0 self.prosmartProcess = None self.cmd = [] self.isWindows = False self.exitCodeFileName = '' self.noLogTime = 0.0 cmd = [self.prosmart, '-p1', '%s' % os.path.abspath(self.pdbToRefine.pdbFileName)] if self.dna: if self.allHomologsList: cmd += ['-dna_rna','-restrain_best', '-p2'] filesList = [] for homologFileName in self.allHomologsList: filesList.append('%s' % os.path.abspath(homologFileName)) cmd += filesList self.outputDirectory = os. getcwd() + os.sep + self.outputDirectoryBase + 'DNAhomol' self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.' + comFileExtension self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.log' self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.ecd' else: cmd += ['-dna_rna', '-self_restrain'] self.outputDirectory = os. getcwd() + os.sep + self.outputDirectoryBase + 'DNAself' self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.' + comFileExtension self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.log' self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.ecd' elif self.allHomologs: cmd.append(self.restraint) cmd.append('-p2') filesList = [] for homologFileName in self.allHomologsList: filesList.append('%s' % os.path.abspath(homologFileName)) cmd += filesList self.outputDirectory = os. getcwd() + os.sep + '%schainAll' % self.outputDirectoryBase self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.' + comFileExtension self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.log' self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.ecd' cmd +=['-side', '-sigmatype', '0'] elif self.chainRestraint.type == 'Hbond': cmd.append('-c1') cmd.append(self.chainRestraint.chain) cmd.append('-bond') self.outputDirectory = os. getcwd() + os.sep + '%schain%s' % (self.outputDirectoryBase, self.chainRestraint.chain) self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.%s' % (self.chainRestraint.chain, comFileExtension) self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.log' % self.chainRestraint.chain self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.ecd' % self.chainRestraint.chain cmd +=['-side', '-sigmatype', '0'] elif self.chainRestraint.type == 'Homologue': if len(self.chainRestraint.restraintList) < 1: self.die('ProSMART: No external homologs supplied for homolog-based restraints; terminating.') cmd.append('-c1') cmd.append(self.chainRestraint.chain) filesList = [] chainsList = [] for (homologFileName, chain) in self.chainRestraint.restraintList: filesList.append('%s' % os.path.abspath(homologFileName)) chainsList.append(chain) cmd.append('-p2') cmd += filesList cmd.append('-c2') cmd += chainsList cmd +=['-restrain_all', '-side', '-sigmatype','0'] self.outputDirectory = os. getcwd() + os.sep + '%schain%s' % (self.outputDirectoryBase, self.chainRestraint.chain) self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.%s' % (self.chainRestraint.chain, comFileExtension) self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.log' % self.chainRestraint.chain self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.ecd' % self.chainRestraint.chain else: self.die('ProSMART: wrong parameter, restraint type %s is not known; terminating.' % self.chainRestraint.type) cmd += ['-o', '%s' % self.outputDirectory] # DNA??? - Oh yes, all self-restraints (for protein chains as well) will be copied self.restraintsFileName = self.outputDirectory + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt' self.cmd = cmd def die(self, message, removeOutputDirectory = False): sys.stdout.write('\n\n' + str(message) + '\n\n') if removeOutputDirectory and os.path.isdir(self.outputDirectory): try: shutil.rmtree(self.outputDirectory) except OSError: sys.stdout.write('Error during removal of InputAnalysis working directory\n') sys.exit(1) def prepareRun(self): outCom = open(self.comFileName, 'w') if not self.isWindows: outCom.write(shellHeader) outCom.write(' \\\n '.join(self.cmd) + ' > %s \n\n' % self.logFileName) outCom.close() if not self.isWindows: st = os.stat(self.comFileName) os.chmod(self.comFileName, st.st_mode | 0111 ) # chmod +x def run(self): self.running = True self.prosmartProcess = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outLog = open(self.logFileName, 'w') for line in self.prosmartProcess.stdout: outLog.write(line) outLog.flush() outLog.close() exitCode = self.prosmartProcess.wait() outExitCode = open(self.exitCodeFileName, 'w') outExitCode.write( str(exitCode) ) outExitCode.close() if exitCode == 0: self.readLog() def isRunning(self): if self.executed: self.running = False return self.running if not os.path.exists(self.exitCodeFileName): if not os.path.exists(self.logFileName): if self.noLogTime == 0.0: self.noLogTime = time.time() self.running = True return self.running else: curTime = time.time() if curTime - self.noLogTime > self.prosmartWaitingTime: self.executed = False self.running = False return self.running else: self.running = True return self.running else: # log file exists self.running = True return self.running else: exitCodeFile = open(self.exitCodeFileName,'r') exitCode = int(exitCodeFile.readline()) exitCodeFile.close() if exitCode == 0: self.executed = True self.running = False return self.running else: self.executed = False self.running = False return self.running def readLog(self): output = None try: outputIN = open(self.logFileName, 'r') output = outputIN.read() outputIN.close() except IOError: self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName) match = re.search(r'ProSMART completed', output) if not self.dna and not match: sys.stdout.write('Something wrong with ProSMART run. ProSMART output: \n') self.die(output) # !!! # What happens with DNA key??? # !!! # match = re.search(r'Final restraints file copied to:\s(.*)\n', output) # # if match: # if os.path.exists(match.group(1)): # self.restrainsFileName = os.path.abspath(match.group(1)) # else: # sys.stdout.write('Cant locate ProSMART output file %s; terminating \n' % match.group(1)) # self.die(output) # else: # sys.stdout.write('Cant locate ProSMART output file name in the log; terminating\n' ) # self.die(output) # # self.executed = True # if self.dna and not self.allHomologsList: # #read self-restraints for DNA/RNA chains (which are not protein chains), if there are any # dnaSelfRestrains = os.listdir(self.outputDirectory + '/Output_Files/Restraints/Chain_Chain/') # if len(dnaSelfRestrains) < 1: # self.die('ProSMART failed to generate self-restraints; terminating. \n') # # proteinChains = self.pdbToRefine.chains.keys() # for entry in dnaSelfRestrains: # # match = re.search(r'^(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)_(\S+)_(\S)\.txt$', entry) # match = re.search(r'^(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)_(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)\.txt$', entry) # if match: # pdb1 = match.group(1) # chain1 = match.group(2) # pdb2 = match.group(3) # chain2 = match.group(4) # # if chain1 != chain2: # self.die('ProSMART: incorrect file names during self-restraints generation; terminating. \n') # # if chain1 not in proteinChains: # try: # restrainsIN = open(self.outputDirectory + '/Output_Files/Restraints/Chain_Chain/'+ entry, 'r') # if restrainsIN: # for currentString in restrainsIN: # if currentString[0] != '#': # self.restrains.append(currentString) # restrainsIN.close() # except IOError: # self.die( # 'ProSMART: can`t open restrains file %s ; terminating.' % # self.outputDirectory + '/Output_Files/Restrains/Chain_Chain/'+ entry) # # # else: # try: # restrainsIN = open(self.outputDirectory + '/' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt', 'r') # if restrainsIN: # for currentString in restrainsIN: # if currentString[0] != '#': # self.restrains.append(currentString) # restrainsIN.close() # except IOError: # if not self.dna: # self.die( # 'ProSMART: can`t open restrains file %s; terminating.' % # self.outputDirectory + '/' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt') def cleanAllOutput(self): try: if os.path.exists(self.outputDirectory): shutil.rmtree(self.outputDirectory) except OSError: sys.stdout.write('\nCan`t remove ProSMART directory %s, trying to continue.\n' % self.outputDirectory) return class Refmac: def __init__(self, refmac='refmac5', xyzin='', xyzout='', hklin='', hklout='', onCluster=False, ncyc=0): self.refmac = refmac self.logFileName = '' self.comFileName = '' self.hydrogens = 'NO' self.jellyBody = False self.jellyBodyWeight = 0.01 self.proSMARTParams = False self.proSMARTFile = '' self.proSMARTScale = 10 self.proSMARTWeight = 0.02 self.proSMARTDmax = 4.2 self.twin = False self.ncs = False self.phased = False self.labin = '' self.free_1 = False self.solventOptimise = False self.solvent = {} self.tls = '' self.scalingLSQ = False self.useWeight = False self.weight = 0.3 self.params = '' self.molprobity = False self.executed = False self.log = None self.xyzin = os.path.abspath(xyzin) self.hklin = os.path.abspath(hklin) self.isWindows = False if len(xyzout) > 1: self.xyzout = xyzout else: self.successfullyLoaded = False return if len(hklout) > 1: self.hklout = hklout else: self.successfullyLoaded = False return self.onCluster = onCluster self.ncyc = ncyc self.xmlFileName = self.xyzout[:-3] + 'xml' self.comFileName = self.xyzout[:-3] + comFileExtension self.logFileName = self.xyzout[:-3] + 'log' self.exitCodeFileName = self.xyzout[:-3] + 'ecd' self.running = False self.refmacWaitingTime = 120 #in seconds self.noLogTime = 0.0 self.noComTime = 0.0 self.refmacProcess = None self.cmd = [] self.successfullyLoaded = True def prepareRun(self): cmd = [self.refmac, 'XYZIN', '%s' % self.xyzin, 'HKLIN', '%s' % self.hklin, 'XYZOUT', '%s' % self.xyzout, 'HKLOUT', '%s' % self.hklout, 'XMLOUT', '%s' % self.xmlFileName] if len(self.tls) > 1: cmd.append('TLSIN') cmd.append('%s' % self.tls) if self.onCluster: cmd.append('SCRREF') cmd.append(os.sep +'tmp'+os.sep+ 'refmac_tmp_' + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(12))) self.cmd = cmd self.params = 'MAKE - \n' self.params += 'HYDROGEN ' + self.hydrogens + ' - \n' self.params += 'NEWLIGAND CONTINUE \n' if self.phased: self.params += 'REFI PHASED \n' self.params += 'NCYC ' + str(self.ncyc) + ' \n' if self.solventOptimise: self.params += 'SOLVENT OPTIMISE\n' if len(self.solvent) == 3: self.params += 'SOLVENT YES -\n' self.params += ' VDWProb %0.2f -\n' % self.solvent['VDWProb'] self.params += ' IONProb %0.2f -\n' % self.solvent['IONProb'] self.params += ' RSHRink %0.2f \n' % self.solvent['RSHRink'] if self.twin: self.params += 'TWIN \n' if self.ncs: self.params += 'NCSR LOCAL \n' if len(self.tls) > 1: self.params += 'REFI TLSC 10 \n' if self.proSMARTParams: # if not os.path.exists(self.proSMARTFile): # sys.stdout.write('ProSMART file %s does not exists; terminating.\n' % self.proSMARTFile) # sys.exit(1) self.params += 'EXTERNAL WEIGHT SCALE ' + str(self.proSMARTScale) + ' \n' self.params += 'EXTERNAL WEIGHT GMWT ' + str(self.proSMARTWeight) + ' \n' self.params += 'EXTERNAL DMAX ' + str(self.proSMARTDmax) + ' \n' if len(self.proSMARTFile) > 1: self.params += '@' + self.proSMARTFile + ' \n' if self.jellyBody: self.params += 'RIDG DIST SIGM ' + str(self.jellyBodyWeight) + ' \n' if self.useWeight: self.params += 'WEIGHT MATRIX ' + str(self.weight) + ' \n' if self.free_1: self.params += 'FREE 1 \n' if self.scalingLSQ: self.params += 'SCALE LSSCALE FUNCTION LSQ \n' self.params += 'MONI DIST 1000000 \n' if len(self.labin) > 1: self.params += 'LABIN %s \n' % self.labin self.params += 'END \n\n' outCom = open(self.comFileName, 'w') if not self.isWindows: outCom.write(shellHeader) outCom.write(' \\\n '.join(cmd) + ' > ' + self.logFileName + ' << EOF\n') outCom.write(self.params) outCom.write('EOF\n\n') outCom.close() st = os.stat(self.comFileName) os.chmod(self.comFileName, st.st_mode | 0111 ) def run(self): #os.system('/bin/sh "%s"' % self.comFileName) self.running = True self.refmacProcess = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE) for param in self.params: self.refmacProcess.stdin.write(param) self.refmacProcess.stdin.flush() outLog = open(self.logFileName, 'w') for line in self.refmacProcess.stdout: outLog.write(line) outLog.flush() outLog.close() exitCode = self.refmacProcess.wait() outExitCode = open(self.exitCodeFileName, 'w') outExitCode.write( str(exitCode) ) outExitCode.close() if exitCode == 0: self.readLog() def isRunning(self): if os.path.exists(self.xyzout) and os.path.exists(self.hklout) and os.path.exists(self.xmlFileName): self.executed = True self.running = False return self.running if not os.path.exists(self.exitCodeFileName): if os.path.exists(self.comFileName): if not os.path.exists(self.logFileName): if self.noLogTime == 0.0: self.noLogTime = time.time() self.running = True return self.running else: curTime = time.time() if curTime - self.noLogTime > self.refmacWaitingTime: self.executed = False self.running = False return self.running else: self.running = True return self.running else: # log file exists self.running = True return self.running else: # no com file if self.noComTime == 0.0: self.noComTime = time.time() self.running = True return self.running else: curTime = time.time() if curTime - self.noComTime > self.refmacWaitingTime: self.executed = False self.running = False return self.running else: self.running = True return self.running else: # there is exit code file -> Refmac finished exitCodeFile = open(self.exitCodeFileName,'r') exitCode = int(exitCodeFile.readline()) exitCodeFile.close() if exitCode == 0: self.executed = True self.running = False return self.running else: self.executed = False self.running = False return self.running def readLog(self): if os.path.exists(self.xmlFileName): if self.solventOptimise: self.log = RefmacXMLLog(self.xmlFileName, solventTest=True, molprobity = self.molprobity, pdbFileName=self.xyzout) else: self.log = RefmacXMLLog(self.xmlFileName, molprobity = self.molprobity, pdbFileName=self.xyzout) return True else: return False def cleanAllOutput(self): try: if os.path.exists(self.hklout): os.remove(self.hklout) if os.path.exists(self.xyzout): os.remove(self.xyzout) if os.path.exists(self.comFileName): os.remove(self.comFileName) if os.path.exists(self.logFileName): os.remove(self.logFileName) if os.path.exists(self.xmlFileName): os.remove(self.xmlFileName) except OSError: sys.stdout.write('\nCan`t clean files after Refmac, trying to continue.\n') return class RefmacXMLLog: def __init__(self, fileName, solventTest = False, molprobity = False, pdbFileName = ''): self.successfullyLoaded = False self.ncyc = 0 self.successfullyRefined = False self.twin = False self.cycles = [] self.solvent = {} self.molprobity = molprobity self.molprobityAnalysis = {} if not os.path.isfile(fileName): sys.stdout.write('No XML log-file found after Refmac run; terminating.\nPlease check input files.\n\n') sys.exit(1) try: xmlRoot = ET.parse(fileName).getroot() if int(xmlRoot.find('twin_info').find('ntwin_domain').text.strip()) > 1: self.twin = True if solventTest: self.solvent['VDWProb'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('vdw_probe_radius').text.strip()) self.solvent['IONProb'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('ion_probe_radius').text.strip()) self.solvent['RSHRink'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('shrinkage_radius').text.strip()) else: for cycles in xmlRoot.find('Overall_stats').find('stats_vs_cycle'): cycle = { 'Ncyc': int(cycles.find('cycle').text.strip()), 'Rfact': float(cycles.find('r_factor').text.strip()), 'Rfree': float(cycles.find('r_free').text.strip()) } if cycles.find('rmsBOND').text.strip()[0] != '*': cycle['rmsBOND'] = float(cycles.find('rmsBOND').text.strip()) else: cycle['rmsBOND'] = 0 cycle['rmsANGL'] = float(cycles.find('rmsANGLE').text.strip()) cycle['rmsCHIRAL'] = float(cycles.find('rmsCHIRAL').text.strip()) self.cycles.append(copy.deepcopy(cycle)) except: sys.stdout.write('Problems with parsing Refmac XML log-file; terminating\n\n') sys.exit(1) self.ncyc = int(len(self.cycles) - 1) self.successfullyLoaded = True if not solventTest and self.cycles[0]['Rfree'] > self.cycles[self.ncyc]['Rfree']: self.successfullyRefined = True elif solventTest and len(self.solvent) == 3: self.successfullyRefined = True else: self.successfullyRefined = False if self.molprobity: molprobProcess = subprocess.Popen(['phenix.molprobity', pdbFileName, 'quiet=True', 'percentiles=True'], stdout=subprocess.PIPE) (out, err) = molprobProcess.communicate() for str in out.splitlines(): mol_parameters = str.split('=') if 'Ramachandran' in mol_parameters[0].strip(): self.molprobityAnalysis['ramaOutliers'] = float(mol_parameters[1].strip()[:-1]) self.cycles[0]['ramaOutliers'] = self.molprobityAnalysis['ramaOutliers'] if 'favored' in mol_parameters[0].strip(): self.molprobityAnalysis['ramaFavoured'] = float(mol_parameters[1].strip()[:-1]) self.cycles[0]['ramaFavoured'] = self.molprobityAnalysis['ramaFavoured'] if 'Clashscore' in mol_parameters[0].strip(): self.molprobityAnalysis['clash'] = float(re.search(r'^(\S*)\s.*', mol_parameters[1].strip()).group(1) ) self.molprobityAnalysis['clash_percentile'] = float(re.search(r'percentile:\s(\S*)\)', mol_parameters[1].strip()).group(1) ) self.cycles[0]['clash'] = self.molprobityAnalysis['clash'] self.cycles[0]['clash_percentile'] = self.molprobityAnalysis['clash_percentile'] if 'MolProbity' in mol_parameters[0].strip(): self.molprobityAnalysis['molprobity'] = float(re.search(r'^(\S*)\s.*', mol_parameters[1].strip()).group(1) ) self.molprobityAnalysis['molprobity_percentile'] = float(re.search(r'percentile:\s(\S*)\)', mol_parameters[1].strip()).group(1) ) self.cycles[0]['molprobity'] = self.molprobityAnalysis['molprobity'] self.cycles[0]['molprobity_percentile'] = self.molprobityAnalysis['molprobity_percentile'] def getRamaOutliers(self): return self.molprobityAnalysis['ramaOutliers'] def getRamaFavoured(self): return self.molprobityAnalysis['ramaFavoured'] def getClashscore(self): return self.molprobityAnalysis['clash'] def getClashscore_percentile(self): return self.molprobityAnalysis['clash_percentile'] def getMolprobity(self): return self.molprobityAnalysis['molprobity'] def getMolprobity_percentile(self): return self.molprobityAnalysis['molprobity_percentile'] def getAllRfree(self): rfrees = [] for cycle in self.cycles: rfrees.append(cycle['Rfree']) return rfrees def getStartRfact(self): return self.cycles[0]['Rfact'] def getStartRfree(self): return self.cycles[0]['Rfree'] def getStartrmsBOND(self): return self.cycles[0]['rmsBOND'] def getStartrmsANGL(self): return self.cycles[0]['rmsANGL'] def getStartrmsCHIRAL(self): return self.cycles[0]['rmsCHIRAL'] def getFinalRfact(self): return self.cycles[self.ncyc]['Rfact'] def getFinalRfree(self): return self.cycles[self.ncyc]['Rfree'] def getFinalrmsBOND(self): return self.cycles[self.ncyc]['rmsBOND'] def getFinalrmsANGL(self): return self.cycles[self.ncyc]['rmsANGL'] def getFinalrmsCHIRAL(self): return self.cycles[self.ncyc]['rmsCHIRAL'] def getRefinementRfactChange(self): return self.cycles[self.ncyc]['Rfact'] - self.cycles[0]['Rfact'] def getRefinementRfreeChange(self): return self.cycles[self.ncyc]['Rfree'] - self.cycles[0]['Rfree'] def getRefinementrmsBONDChange(self): return self.cycles[self.ncyc]['rmsBOND'] - self.cycles[0]['rmsBOND'] def getRefinementrmsANGLChange(self): return self.cycles[self.ncyc]['rmsANGL'] - self.cycles[0]['rmsANGL'] def getRefinementrmsCHIRALChange(self): return self.cycles[self.ncyc]['rmsCHIRAL'] - self.cycles[0]['rmsCHIRAL'] class InputAnalysis: def __init__(self, outputDirectory): try: outputDirectory = os.path.abspath(outputDirectory) if outputDirectory[-1:] != os.sep: outputDirectory += os.sep if not os.path.exists(outputDirectory): os.makedirs(outputDirectory) except OSError: self.die('"InputAnalysis" can`t prepare output directory %s' % outputDirectory) self.outputDirectory = outputDirectory self.prosmart = 'prosmart' self.comFileName = '' self.logFileName = '' self.labin = '' self.free_1 = False self.molprobity = False def die(self, message, removeOutputDirectory = False): sys.stdout.write('\n\n' + str(message) + '\n\n') if removeOutputDirectory and os.path.isdir(self.outputDirectory): try: shutil.rmtree(self.outputDirectory) except OSError: sys.stdout.write('Error during removal of InputAnalysis working directory\n') sys.exit(1) def twinTest(self, pdbToRefine): pdbTwintest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'twintest.pdb', hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'twintest.mtz') if pdbTwintest.successfullyLoaded: pdbTwintest.twin = True pdbTwintest.ncyc = 0 if len(self.labin) > 0: pdbTwintest.labin = self.labin pdbTwintest.free_1 = self.free_1 pdbTwintest.prepareRun() pdbTwintest.run() else: self.die('Can`t prepare Refmac run for twin test; terminating.\nPlease make sure that input files are correct.') if pdbTwintest.log.successfullyLoaded: pdbTwintest.cleanAllOutput() return pdbTwintest.log.twin else: self.die('Refmac run during twin test failed; terminating.\nPlease make sure that input files are correct.') def mr(self, pdbToRefine): refmacMRtest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'mrTest.pdb', hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'mrTest.mtz') if refmacMRtest.successfullyLoaded: refmacMRtest.twin = pdbToRefine.twin refmacMRtest.ncyc = 3 if len(self.labin) > 0: refmacMRtest.labin = self.labin refmacMRtest.free_1 = self.free_1 refmacMRtest.jellyBody = True refmacMRtest.prepareRun() refmacMRtest.run() else: self.die('Can`t prepare Refmac run for jelly body cycles after MR (1 of 2); terminating.\nPlease make sure that input files are correct.') if refmacMRtest.log.successfullyLoaded: pdbName = os.path.basename(pdbToRefine.pdbFileName) refmacMR = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'mr_' + pdbName, hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'mr_' + pdbName[:-3] + 'mtz') if refmacMR.successfullyLoaded: refmacMR.twin = pdbToRefine.twin if refmacMRtest.log.getFinalRfree() > 0: if refmacMRtest.log.getFinalRfree() < 0.35: refmacMR.ncyc = 100 else: refmacMR.ncyc = 200 else: refmacMR.ncyc = 200 if len(self.labin) > 0: refmacMR.labin = self.labin refmacMR.free_1 = self.free_1 refmacMR.jellyBody = True refmacMR.molprobity = self.molprobity refmacMR.prepareRun() refmacMR.run() else: self.die('Can`t prepare Refmac run for jelly body cycles after MR (2 of 2); terminating.\nPlease make sure that input files are correct.') if refmacMR.log.successfullyLoaded: return refmacMR else: self.die('Refmac run during jelly body cycles after MR failed (2 of 2); terminating.\nPlease make sure that input files are correct.') else: self.die('Refmac run during jelly body cycles after MR failed (1 of 2); terminating.\nPlease make sure that input files are correct.') def scaleTestAndStartingValues(self, pdbToRefine): pdbScaletest1 = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'scaletest1.pdb', hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'scaletest1.mtz') if pdbScaletest1.successfullyLoaded: pdbScaletest1.ncyc = 0 if pdbToRefine.twin: pdbScaletest1.twin = True if len(self.labin) > 0: pdbScaletest1.labin = self.labin pdbScaletest1.free_1 = self.free_1 pdbScaletest1.molprobity = self.molprobity pdbScaletest1.prepareRun() pdbScaletest1.run() else: self.die( 'Can`t prepare Refmac run for scaling test 1; terminating.\nPlease make sure that input files are correct.') if not pdbScaletest1.log.successfullyLoaded: self.die('Refmac run during scaling test 1 failed; terminating.\nPlease make sure that input files are correct.') pdbScaletest1.cleanAllOutput() pdbScaletest2 = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'scaletest2.pdb', hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'scaletest2.mtz') if pdbScaletest2.successfullyLoaded: pdbScaletest2.ncyc = 0 pdbScaletest2.scalingLSQ = True if pdbToRefine.twin: pdbScaletest2.twin = True if len(self.labin) > 0: pdbScaletest2.labin = self.labin pdbScaletest2.free_1 = self.free_1 pdbScaletest2.molprobity = self.molprobity pdbScaletest2.prepareRun() pdbScaletest2.run() else: self.die( 'Can`t prepare Refmac run for scaling test 2; terminating.\nPlease make sure that input files are correct.') if not pdbScaletest2.log.successfullyLoaded: self.die('Refmac run during scaling test 2 failed; terminating.\nPlease make sure that input files are correct.') pdbScaletest2.cleanAllOutput() solventTest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'solventTest.pdb', hklin=pdbToRefine.mtzFileName, hklout=self.outputDirectory + 'solventTest.mtz') if solventTest.successfullyLoaded: solventTest.ncyc = 0 solventTest.solventOptimise = True if pdbToRefine.twin: pdbScaletest2.twin = True if pdbScaletest1.log.getFinalRfree() > pdbScaletest2.log.getFinalRfree(): solventTest.scalingLSQ = True if len(self.labin) > 0: solventTest.labin = self.labin solventTest.free_1 = self.free_1 solventTest.prepareRun() solventTest.run() else: self.die( 'Can`t prepare Refmac run for solvent test; terminating.\nPlease make sure that input files are correct.') if not solventTest.log.successfullyLoaded: self.die('Refmac run during solvent test failed; terminating.\nPlease make sure that input files are correct.') solventTest.cleanAllOutput() if pdbScaletest1.log.getFinalRfree() > pdbScaletest2.log.getFinalRfree(): pdbToRefine.scalingLSQ = True pdbToRefine.startingValues = pdbScaletest2.log.cycles[0] pdbToRefine.solvent = solventTest.log.solvent return pdbToRefine else: pdbToRefine.scalingLSQ = False pdbToRefine.startingValues = pdbScaletest1.log.cycles[0] pdbToRefine.solvent = solventTest.log.solvent return pdbToRefine def matchToHomologs(self, pdbFileName, homologsFilesList, pdbChain = None, homologsChains = None, ss = False): chains = {} # dictionary chainId : list of dictionaries: homologous chain, SeqID, local, global RMSD parametersOUT = open(self.outputDirectory + 'p1.txt', 'w') parametersOUT.write('-p1\n') parametersOUT.write('%s\n' % os.path.abspath(pdbFileName)) if homologsFilesList: for fileName in homologsFilesList: parametersOUT.write('%s\n' % os.path.abspath(fileName)) parametersOUT.close() if pdbChain: parametersOUT = open(self.outputDirectory + 'c1.txt', 'w') parametersOUT.write('-c1\n') parametersOUT.write('%s\n' % pdbChain) if homologsChains: for chain in homologsChains: parametersOUT.write('%s\n' % chain) parametersOUT.close() cmd = [self.prosmart, '-f', '%sp1.txt' % self.outputDirectory] if pdbChain: cmd += ['-f', '%sc1.txt' % self.outputDirectory] cmd += ['-a1', '-quick', '-o', '%sProSMART_Output%s' % (self.outputDirectory, os.sep)] # output = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout.read() self.comFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_prosmart.' + comFileExtension self.logFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_prosmart.log' outCom = open(self.comFileName, 'w') if not isWindows: outCom.write(shellHeader) outCom.write(' \\\n '.join(cmd) + ' > %s \n\n' % self.logFileName) outCom.close() if not isWindows: st = os.stat(self.comFileName) os.chmod(self.comFileName, st.st_mode | 0111 ) # os.system('/bin/sh "%s"' % self.comFileName) prosmartProcess = subprocess.Popen(cmd, stdout=subprocess.PIPE) outLog = open(self.logFileName, 'w') for line in prosmartProcess.stdout: outLog.write(line) outLog.close() outputIN = open(self.logFileName, 'r') if not outputIN: self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName) output = outputIN.read() outputIN.close() match = re.search(r'ProSMART completed', output) if not match: sys.stdout.write('Something wrong with ProSMART run for the input analysis. ProSMART output: \n') self.die(output) homologsFilesList.append(pdbFileName) allPDBandChainsList = os.listdir(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Global_Statistics'+os.sep+'Single'+os.sep) if len(allPDBandChainsList) < 1: sys.stdout.write('ProSMART failed to generate statistics files for input analysis. ProSMART output: \n') self.die(output) # rmsdRejection = False match = False for entry in allPDBandChainsList: for homolog in homologsFilesList: match = re.search(r'^(' + os.path.basename(pdbFileName)[:-4] + ')_(\S)_(' + os.path.basename(homolog)[:-4] + ')_(\S)\.txt$', entry) if match: break if match: pdb1 = match.group(1) chain1 = match.group(2) pdb2 = match.group(3) chain2 = match.group(4) if pdb1 != pdb2: chainInfo = {} lengthOfCurrentChain = 1 try: lengthOfCurrentChainFile = os.stat(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+'%s_%s.txt' %(pdb1, chain1)).st_size lengthOfCurrentChain = lengthOfCurrentChainFile - 8 - int (lengthOfCurrentChainFile/10) except: self.die('Error when getting statistics for file %s; terminating ' % self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+'%s_%s.txt' %(pdb1, chain1)) for fileName in homologsFilesList: if os.path.basename(fileName)[:-4] == pdb2: chainInfo['pdb'] = fileName break chainInfo['chain'] = chain2 try: chainInfoIN = open(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Global_Statistics'+os.sep+'Single'+os.sep + entry) except IOError: self.die('Can`t open ProSMART generated file %s during input analysis.' % entry) for currentString in chainInfoIN: parameters = currentString.split() chainInfo[parameters[0]] = parameters[1] chainInfoIN.close() # To calculate PercentAligned use 'NumberAligned' - 'Aligned' # New ProSMART version support if 'GlobalRMSD' in chainInfo.keys(): chainInfo['RMSD'] = chainInfo['GlobalRMSD'] if 'NumberAligned' in chainInfo.keys(): chainInfo['Aligned'] = chainInfo['NumberAligned'] if 'AverageFlexible' in chainInfo.keys(): chainInfo['Minimum'] = chainInfo['AverageFlexible'] if 'SequenceIdentity' in chainInfo.keys(): chainInfo['SeqIden'] = chainInfo['SequenceIdentity'] chainInfo['Percent'] = (float(chainInfo['Aligned']) / float(lengthOfCurrentChain) ) * 100 if 'SeqIden' not in chainInfo.keys() or \ 'Minimum' not in chainInfo.keys() or \ 'Percent' not in chainInfo.keys() or \ 'RMSD' not in chainInfo.keys(): self.die('Can`t find SeqIden or Minimum or RMSD data in ProSMART output; terminating.\n') if float(chainInfo['RMSD']) >= rmsdTreshold and \ float(chainInfo['Percent'])/100 >= MinimalPercentAligned and \ float(chainInfo['SeqIden'])/100 >= MinimalSequenceIdentityForHomologs: if chain1 in chains.keys(): chains[chain1].append(copy.deepcopy(chainInfo)) else: chains[chain1] = [] chains[chain1].append(copy.deepcopy(chainInfo)) else: if chain1 not in chains.keys(): chains[chain1] = [] if chain2 not in chains.keys(): chains[chain2] = [] if ss: try: if os.path.exists('%sProSMART_Output%s' % (self.outputDirectory,os.sep)): shutil.rmtree('%sProSMART_Output%s' % (self.outputDirectory, os.sep)) except OSError: sys.stdout.write('\nCan`t remove ProSMART directory %s, trying to continue.\n' % '%sProSMART_Output' % self.outputDirectory) return chains def getSequence(self, pdbFileName): chainSequences = [] cmd = [self.prosmart, '-p1', '%s' % os.path.abspath(pdbFileName), '-a1', '-quick', '-o', '%sProSMART_Chains%s' % (self.outputDirectory, os.sep)] # output = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout.read() self.comFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_chains.' + comFileExtension self.logFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_chains.log' outCom = open(self.comFileName, 'w') if not isWindows: outCom.write(shellHeader) outCom.write(' \\\n '.join(cmd) + ' > %s \n\n' % self.logFileName) outCom.close() if not isWindows: st = os.stat(self.comFileName) os.chmod(self.comFileName, st.st_mode | 0111 ) # os.system('/bin/sh "%s"' % self.comFileName) prosmartProcess = subprocess.Popen(cmd, stdout=subprocess.PIPE) outLog = open(self.logFileName, 'w') for line in prosmartProcess.stdout: outLog.write(line) outLog.close() outputIN = open(self.logFileName, 'r') if not outputIN: self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName) output = outputIN.read() outputIN.close() match = re.search(r'ProSMART completed', output) if not match: sys.stdout.write('Something wrong with ProSMART run for the input analysis. ProSMART output: \n') self.die(output) allChains = os.listdir(self.outputDirectory + 'ProSMART_Chains'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep) if len(allChains) < 1: sys.stdout.write('ProSMART failed to generate statistics files for input analysis. ProSMART output: \n') self.die(output) for fileName in allChains: match = re.search(os.path.basename(pdbFileName)[:-4]+'_(\S)\.txt', fileName) if match: chain = match.group(1) sequenceFile = open(self.outputDirectory + 'ProSMART_Chains'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+fileName,'r') firstString = sequenceFile.readline() chainSequence = '' for line in sequenceFile: chainSequence += line.strip() sequenceFile.close() if len(chainSequence) > 0: chainSequences.append((chain, ''.join(chainSequence.split()))) # removing internal spaces if len(chainSequences) > 0: return chainSequences else: self.die('Can`t get protein sequence from the supplied PDB file; terminating.') class Pipeline: def __init__(self): self.nc = 3 # maximal number of chains for restraints generation self.protocols = [] # List of RefinementProtocol classes self.outputDirectory = '.'+os.sep+'lrsr_Output'+os.sep self.pdbToRefine = None self.homologsFilesList = None self.bestPDB = '' self.bestMTZ = '' self.logFile = None self.dna = False self.tls = '' self.ss = False self.labin = '' self.free_1 = False self.nCPU = 1 self.freeCPUs = 1 self.phased = False self.mr = False self.molprobity = False self.maximalNumberOfHomologs = 5 self.lowestResolutionForAutoHomologues = 3.5 self.xyzout = None self.hklout = None self.windows = False def die(self, message, removeOutputDirectory=False): sys.stdout.write('\n\n' + str(message) + '\n\n') if removeOutputDirectory and os.path.isdir(self.outputDirectory): try: shutil.rmtree(self.outputDirectory) except OSError: sys.stdout.write('Error during removal of program`s working directory\n') sys.exit(1) def parseCommandLine(self, arguments): parser = argparse.ArgumentParser(add_help=False) parser.set_defaults(auto=False) parser.add_argument('-xyzin', '-p1', '--pdb1', action='store', dest='pdb1', help=argparse.SUPPRESS) parser.add_argument('-hklin', '-f', '--mtz', action='store', dest='f', help=argparse.SUPPRESS) parser.add_argument('-xyzout', action='store', dest='xyzout', help=argparse.SUPPRESS) parser.add_argument('-hklout', action='store', dest='hklout', help=argparse.SUPPRESS) parser.add_argument('-p2', '--pdb2', nargs='*', action='store', dest='pdb2', help=argparse.SUPPRESS) parser.add_argument('-o', '--output', action='store', dest='output', help=argparse.SUPPRESS) parser.add_argument('-pr', '--protocol', action='store', dest='protocol', help=argparse.SUPPRESS) parser.add_argument('-nc', '--num_chains', action='store', dest='nc', type=int, help=argparse.SUPPRESS) parser.add_argument('-auto', action='store_true', dest='auto', help=argparse.SUPPRESS) parser.add_argument('-nh', '--num_homologs', action='store', dest='nh', type=int, help=argparse.SUPPRESS) parser.add_argument('-dna', action='store_true', dest='dna', help=argparse.SUPPRESS) parser.add_argument('-tls', action='store', dest='tls', type=str, help=argparse.SUPPRESS) parser.add_argument('-save_space', action='store_true', dest='ss', help=argparse.SUPPRESS) parser.add_argument('-labin', action='store', dest='labin', help=argparse.SUPPRESS) parser.add_argument('-phased', action='store_true', dest='phased', help=argparse.SUPPRESS) parser.add_argument('-free_1', action='store_true', dest='free_1', help=argparse.SUPPRESS) parser.add_argument('-cpu', action='store', dest='cpu', type=int, help=argparse.SUPPRESS) parser.add_argument('-mr', action='store_true', dest='mr', help=argparse.SUPPRESS) parser.add_argument('-minres', action='store', dest='minRes', type=float, help=argparse.SUPPRESS) parameters = parser.parse_args(arguments) pdb2 = [] protocol = None if not parameters.pdb1: self.die('%s\nProgram needs model to refine (-p1)' % helpMessage) if not parameters.f: self.die('%s\nProgram needs data to refine (-f) ' % helpMessage) if not os.path.isfile(parameters.pdb1): self.die('%s\nModel file "%s" (-p1) does not exist' % (helpMessage, parameters.pdb1)) if not os.path.isfile(parameters.f): self.die('%s\nData file "%s" (-f) does not exist' % (helpMessage, parameters.f)) if parameters.output: if parameters.output[-1:] != os.sep: parameters.output += os.sep self.outputDirectory = parameters.output # adding number to the directory name if the directory exists if os.path.exists(self.outputDirectory): self.outputDirectory = generateNextDirectoryName(self.outputDirectory) try: os.makedirs(self.outputDirectory) except OSError: self.die('%s\nProgram can not make output directory %s' % (helpMessage, self.outputDirectory)) sys.stdout = Logger(self.outputDirectory +'Pipeline.log') sys.stdout.write ('Working in: %s\n\n' % self.outputDirectory) sys.stdout.write ('Program received following parameters: \n') if parameters.pdb2: # Pipeline can accept as input file names, wildcards with *, ?, etc, or text files with one PDB file name per line for currentFile in parameters.pdb2: if not os.path.isfile(currentFile): filesList = glob.glob(currentFile) # expanding wildcard if len(filesList) < 1: self.die('%s\nHomologous model file "%s" (-p2) does not exist' % (helpMessage, currentFile)) else: for anotherCurrentFile in filesList: pdb2.append(anotherCurrentFile) continue if currentFile[-3:].lower() != 'pdb': # if extension is not 'pdb' then assuming this is a text file with PDB file names (one per line) listOfFilesIN = open(currentFile, 'r') for anotherCurrentFile in listOfFilesIN: anotherCurrentFile = anotherCurrentFile.strip() if len(anotherCurrentFile) > 0: if os.path.isfile(anotherCurrentFile): pdb2.append(anotherCurrentFile) else: self.die('%s\nHomologous model file %s read from the file %s does not exist; terminating' % ( helpMessage, anotherCurrentFile, currentFile)) else: pdb2.append(currentFile) # Now pdb2 contains all file names and these files at least physically exist # checking whether the same file name was supplied once or not basenames = [] for currentFile in pdb2: basenames.append(os.path.basename(currentFile)) for basename in basenames: if basenames.count(basename) > 1: self.die( 'You have supplied homologous model files with identical names: %s; files should have different names' % basename) # convert all file names to absolute path parameters.pdb1 = os.path.abspath(parameters.pdb1) parameters.f = os.path.abspath(parameters.f) pdb2 = [os.path.abspath(currentFile) for currentFile in pdb2] sys.stdout.write('Target structure: %s\n' % parameters.pdb1) sys.stdout.write('Data for target structure: %s\n' % parameters.f) sys.stdout.write('Supplied homologous structures: ') if len(pdb2) < 1: sys.stdout.write('None') else: for pdbName in pdb2: sys.stdout.write('\n' + pdbName) sys.stdout.write('\n') pdbToRefine = PDBtoRefine(parameters.pdb1, parameters.f) if parameters.xyzout: sys.stdout.write('Output PDB file name: %s\n' % parameters.xyzout) self.xyzout = parameters.xyzout if parameters.hklout: sys.stdout.write('Output MTZ file name: %s\n' % parameters.hklout) self.hklout = parameters.hklout if parameters.nh: sys.stdout.write('Maximal number of homologues (per unique chain): %d \n' % int(parameters.nh)) self.maximalNumberOfHomologs = parameters.nh if len(pdb2) > self.maximalNumberOfHomologs: sys.stdout.write('\nWarning: too many homologs supplied (%d); using first %d homologs.\n\n' % ( len(pdb2), self.maximalNumberOfHomologs)) pdb2 = pdb2[:self.maximalNumberOfHomologs] if parameters.protocol: sys.stdout.write('Protocol file: %s\n' % parameters.protocol) if not os.path.isfile(parameters.protocol): self.die('%s\nProtocol file "%s" (-pr) does not exist' % (helpMessage, parameters.protocol)) if len(pdb2) > 0: sys.stdout.write( 'You supplied both protocol file (-pr option) and external homologues (-p2 option);\n ignoring -p2 and using only homologues from the protocol file.\n\n') pdb2 = [] sys.stdout.write('PDB files indicated by the protocol file: \n') try: xmlRoot = ET.parse(parameters.protocol).getroot() protocol = RefinementProtocol(pdbToRefine, xmlRoot.find('Best_protocol').attrib['Type']) protocol.description = xmlRoot.find('Best_protocol').attrib['Description'] if xmlRoot.find('Best_protocol').attrib['Type'] not in ['ExternalRestraints', 'JellyBody', 'AllHomologs']: self.die('Unsupported protocol type %s found in the protocol file (-pr option); terminating' % xmlRoot.find('Best_protocol').attrib['Type']) if xmlRoot.find('Best_protocol').attrib['Type'] == 'ExternalRestraints': for restraint in xmlRoot.find('Best_protocol').find('pdbChainRestraints'): chainRestraints = ChainRestraints(restraint.text, restraint.tag) for currentId in restraint.attrib.keys(): restrainFileNameAndChain = restraint.attrib[currentId].split(';@;') if len(restrainFileNameAndChain) != 2: self.die('%s\nCant extract filename and chain from the protocol file (-pr option); terminating' % helpMessage) if not os.path.isfile(restrainFileNameAndChain[0]): self.die('%s\nFile %s specified in the protocol file %s does not exist; terminating.' % (helpMessage, restrainFileNameAndChain[0], parameters.protocol)) sys.stdout.write(restrainFileNameAndChain[0]+'\n') chainRestraints.restraintList.append((restrainFileNameAndChain[0], restrainFileNameAndChain[1] )) pdb2.append(restrainFileNameAndChain[0]) protocol.pdbChainRestraints.append(chainRestraints) elif xmlRoot.find('Best_protocol').attrib['Type'] == 'AllHomologs': protocol.homologsFilesList = [] for item in xmlRoot.find('Best_protocol').find('homologsFilesList'): if item.tag == 'restrain': protocol.restraint = item.text[1:-1] else: if os.path.isfile(item.text[1:-1]): sys.stdout.write(item.text[1:-1]+'\n') protocol.homologsFilesList.append(item.text[1:-1]) else: self.die('%s\nFile %s specified in the protocol file %s does not exist; terminating.' % (helpMessage, item.text[1:-1], parameters.protocol)) except: self.die( '%s\nParsing error of the protocol file "%s" (supplied by -pr option); terminating.' % (helpMessage, parameters.protocol)) if parameters.nc and not parameters.nc > 0: self.die('%s\nNumber of chains for refinement protocols (-nc) must be one or more.' % helpMessage) elif parameters.nc: sys.stdout.write('Maximal number of chains for external restraints generation: %d\n' % parameters.nc) self.nc = parameters.nc if parameters.dna: sys.stdout.write('Generate restraints for DNA/RNA: YES\n') self.dna = True if parameters.tls: if not os.path.isfile(parameters.tls): self.die('%s\nTLS input file "%s" (-tls) does not exist' % (helpMessage, parameters.f)) self.tls = os.path.abspath(parameters.tls) sys.stdout.write('File with TLS parameters: %s\n' % self.tls) if parameters.ss: sys.stdout.write('-save_space option: saving disk space by removing ProSMART output\n') self.ss = True if parameters.phased: if len(parameters.labin) < 1: self.die('%s\nPlease specify correct -labin parameter if you wish to use phased refinement option.\n' % (helpMessage)) sys.stdout.write('-phased option: using phased refinement\n') self.phased = True if parameters.labin: sys.stdout.write('Refmac LABIN option: %s\n' % parameters.labin) self.labin = parameters.labin if parameters.free_1: sys.stdout.write('Refmac "FREE 1" option specified for CNS-style Free flag \n') self.free_1 = True if parameters.mr: sys.stdout.write('Run 50-200 cycles of jelly body refinement after MR: YES\n') self.mr = True if parameters.cpu and not parameters.cpu > 0: self.die('%s\nNumber of CPUs used (-cpu) must be one or more.' % helpMessage) elif parameters.cpu: sys.stdout.write('Number of CPUs used: %d\n' % parameters.cpu) self.nCPU = parameters.cpu if parameters.minRes and not parameters.minRes > 1.0: self.die('%s\nSupplied lowest resolution for automatically downloaded homologues (%0.2f A) shall be more than 1 A; terminating\n' % (helpMessage, parameters.minRes)) elif parameters.minRes: sys.stdout.write('Lowest resolution for automatically downloaded homologues: %0.2f\n' % parameters.minRes) self.lowestResolutionForAutoHomologues = parameters.minRes #checking for phenix.molprobity installation lpath=os.getenv('PATH') for p in lpath.split(os.path.pathsep): p=os.path.join(p,'phenix.molprobity') if os.path.exists(p) and os.access(p,os.X_OK): self.molprobity = True break if self.molprobity: sys.stdout.write('phenix.molprobity detected; geometrical quality will be assesed\n') else: sys.stdout.write('phenix.molprobity not found; geometrical quality will not be assesed\n') if parameters.auto: pdb2 += self.autoHomologsSelection(pdbToRefine.pdbFileName) return pdbToRefine, pdb2, protocol def autoHomologsSelection(self, pdbFileName): sys.stdout.write('\nAuto selection of homologues:\n') sys.stdout.write('Analysing protein chains: ') chainsAndSequences = InputAnalysis(self.outputDirectory+'ChainsAnalysis'+os.sep).getSequence(pdbFileName) targetSequences = [] chainsAndSequences = sorted(chainsAndSequences, key=lambda x: x[0] ) # sorting by chain name -> A, B, C... sys.stdout.write(chainsAndSequences[0][0]) for (chain, sequence) in chainsAndSequences: if len(list(set(list(sequence)))) < 8: # to filter out DNA/RNA chains, no BLAST search for them. 8 is a bit arbitrary - means we are BLASTing chains composed of more than 8 different aminoacids (in nucleotides shall be 4-5 nucleotide types) sys.stdout.write(', %s is not protein' % chain) continue if len(targetSequences) < 1: targetSequences.append((chain, sequence)) else: addToList = True for (targetChain, targetSequence) in targetSequences: align_obj = align(seq_a=sequence, seq_b=targetSequence) alignment = align_obj.extract_alignment() numberOfIdenticalResidues = len(''.join(alignment.matches().split())) seqIdentity = float(numberOfIdenticalResidues) / float(max([len(sequence), len(targetSequence)])) if seqIdentity > MinimalSequenceIdentityForHomologs: addToList = False sys.stdout.write(', %s matches %s' % (chain, targetChain)) break if addToList: sys.stdout.write(', ' + chain) targetSequences.append((chain, sequence)) sys.stdout.write(' ...done.\n') pdbCodes = [] pdbsTo2 = [] counter = 0 try: os.makedirs(self.outputDirectory + 'Download'+os.sep) except: self.die('Error while creating directory %s; terminating.' % self.outputDirectory + 'Download'+os.sep) sys.stdout.write('Running BLAST search... \n') for (targetChain, targetSequence) in targetSequences: if len(targetSequence) < minimalLengthOfProtein: # Everything less than this value is a peptide continue sys.stdout.write('Chain %s:\n' % targetChain) params = { 'eCutOff': '10.0', 'sequence': targetSequence, 'matrix' : 'BLOSUM62', 'outputFormat' : 'XML'} postData = urllib.urlencode(params) blastXML='' try: webData= urllib2.urlopen('http://www.rcsb.org/pdb/rest/getBlastPDB1?'+postData) blastXML = webData.read() webData.close() except: self.die('Internet connection error when trying to run BLAST search of the homologues; terminating.') try: blastRoot = ET.fromstring(blastXML) if blastRoot.find('BlastOutput_iterations') is None or \ blastRoot.find('BlastOutput_iterations').find('Iteration') is None or \ blastRoot.find('BlastOutput_iterations').find('Iteration').find('Iteration_hits') is None: continue for hit in blastRoot.find('BlastOutput_iterations').find('Iteration').find('Iteration_hits'): pdbCode = hit.find('Hit_def').text[0:4].lower() match = re.search(r'(.*):(.*):([^\|]*)\|',hit.find('Hit_def').text) if match: pdbChain = match.group(3).split(',')[0] if not match or len(pdbChain) < 1: self.die('Something wrong with BLAST output (cant find chains); terminating.') identity = float(hit.find('Hit_hsps')[0].find('Hsp_identity').text) / len(targetSequence) alignmentLength = float(hit.find('Hit_hsps')[0].find('Hsp_align-len').text) / len(targetSequence) if alignmentLength > 1: alignmentLength = 1 if identity > 1: identity = 1 if alignmentLength >= MinimalPercentAligned and \ identity >= MinimalSequenceIdentityForHomologs: sys.stdout.write('%s : Aligned %d%%, Sequence identity %0.0f%% ' % (pdbCode, alignmentLength * 100, identity * 100)) if pdbCode in pdbCodes: sys.stdout.write('- already analysed\n') continue else: pdbCodes.append(pdbCode) downloadedFileName = self.downloadPDB(pdbCode) if len(downloadedFileName) < 1: continue downloadedPDBAnalysis = InputAnalysis(self.outputDirectory+'Download'+os.sep+'Analysis_'+ pdbCode + os.sep) analysisResults = downloadedPDBAnalysis.matchToHomologs(downloadedFileName, [x[1] for x in pdbsTo2]+[pdbFileName], pdbChain, [x[0] for x in pdbsTo2]+[targetChain], ss = self.ss) correctChainsFound = False for chain in analysisResults.keys(): if len(analysisResults[chain]) > 0: correctChainsFound = True if correctChainsFound: sys.stdout.write(' - accepted for restraints generation\n') pdbsTo2.append((pdbChain, downloadedFileName)) counter += 1 else: sys.stdout.write(' - rejected (discrepancy between SEQ record and modelled atoms)\n') try: os.remove(downloadedFileName) except: sys.stdout.write('Unable to delete downloaded PDB file %s; trying to continue\n' % downloadedFileName) if counter >= self.maximalNumberOfHomologs: break counter = 0 except ParseError: sys.stdout.write('Unexpected BLAST output for this chain; trying to continue.\n') continue if len(pdbsTo2) < 1: sys.stdout.write('No homologues with sequence identity more than %d%%, covering more than %d%% of target sequence and having RMSD with original structure more than %0.2f.' % (int(MinimalSequenceIdentityForHomologs * 100), int(MinimalPercentAligned*100), rmsdTreshold)) return [] else: return [x[1] for x in pdbsTo2] def downloadPDB(self, pdbCode): pdbText = '' try: webData= urllib2.urlopen('http://www.rcsb.org/pdb/files/' + pdbCode + '.pdb') pdbText = webData.read() webData.close() except: sys.stdout.write('Internet connection error when trying to fetch PDB file %s; trying next\n' % pdbCode) return '' if len(pdbText) == 0: sys.stdout.write('No data received for PDB file %s; trying next\n' % pdbCode) return '' flagCA = False currentChain = '' currentAltConf = '' resolutionMatch = False for pdbStr in pdbText.split('\n'): if not resolutionMatch: resolutionMatch = re.search('RESOLUTION.*(\d+\.\d+)\s', pdbStr) if resolutionMatch: if float(resolutionMatch.group(1)) > self.lowestResolutionForAutoHomologues: sys.stdout.write('Resolution of %s is %s A (minimal treshold %0.1f A) - rejecting; trying next\n' % (pdbCode, resolutionMatch.group(1), self.lowestResolutionForAutoHomologues)) return '' if pdbStr[0:4] == 'ATOM': if pdbStr[13:15] != 'CA': flagCA = False if pdbStr[13:15] == 'CA' and flagCA and pdbStr[21:22] == currentChain and pdbStr[16:17] == currentAltConf: sys.stdout.write(' - PDB file %s has only CA atoms; rejecting\n' % pdbCode) return '' if pdbStr[13:15] == 'CA' and not flagCA: flagCA = True currentChain = pdbStr[21:22] currentAltConf = pdbStr[16:17] pdbFileName = self.outputDirectory + 'Download' + os.sep + pdbCode + '.pdb' try: pdbFile = open(pdbFileName, 'w') pdbFile.write(pdbText) pdbFile.close() except: sys.stdout.write('Error while saving PDB file %s; trying next\n' % pdbFileName) return '' return os.path.abspath(pdbFileName) def addProtocol(self, homologsForProtocols, numberOfHomologs=0, description='', reverse=False, allHomologs=False, restraint=''): if not homologsForProtocols or numberOfHomologs < 1 or description == '': self.die('Incorrect parameters for addProtocol call; terminating\n') # special case of all homologs if allHomologs: protocol = RefinementProtocol(self.pdbToRefine, 'AllHomologs') protocol.description = description protocol.homologsFilesList = homologsForProtocols protocol.restraint = restraint sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description)) for homolog in protocol.homologsFilesList: sys.stdout.write('%s\n' % os.path.basename(homolog)) sys.stdout.write('\n') protocol.isWindows = isWindows self.protocols.append(protocol) return # otherwise - not all homologues maximumNumberOfAvailableHomologs = max(len(x) for x in homologsForProtocols.values()) if numberOfHomologs > maximumNumberOfAvailableHomologs: # simply not adding protocols return protocol = RefinementProtocol(self.pdbToRefine, 'ExternalRestraints') protocol.description = description sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description)) for chain in sorted(homologsForProtocols.keys()): if len(homologsForProtocols[chain]) > 0: chainRestraints = ChainRestraints(chain, 'Homologue') if len(homologsForProtocols[chain]) < numberOfHomologs: n = len(homologsForProtocols[chain]) else: n = numberOfHomologs if not reverse: for i in range(0, n): chainRestraints.restraintList.append( (homologsForProtocols[chain][i]['pdb'], homologsForProtocols[chain][i]['chain'])) sys.stdout.write( '%s, chain %s: restraints from %s, chain %s. Sequence identity: %0.2f, local RMSD: %0.2f, global RMSD: %0.2f \n' % (os.path.basename(self.pdbToRefine.pdbFileName), chain, os.path.basename(homologsForProtocols[chain][i]['pdb']), homologsForProtocols[chain][i]['chain'], float(homologsForProtocols[chain][i]['SeqIden']) / 100, float(homologsForProtocols[chain][i]['Minimum']), float(homologsForProtocols[chain][i]['RMSD']) )) else: for i in reversed(range(len(homologsForProtocols[chain]) - n, len(homologsForProtocols[chain]))): chainRestraints.restraintList.append( (homologsForProtocols[chain][i]['pdb'], homologsForProtocols[chain][i]['chain'])) sys.stdout.write( '%s, chain %s: restraints from %s, chain %s. Sequence identity: %0.2f, local RMSD: %0.2f, global RMSD: %0.2f \n' % (os.path.basename(self.pdbToRefine.pdbFileName), chain, os.path.basename(homologsForProtocols[chain][i]['pdb']), homologsForProtocols[chain][i]['chain'], float(homologsForProtocols[chain][i]['SeqIden']) / 100, float(homologsForProtocols[chain][i]['Minimum']), float(homologsForProtocols[chain][i]['RMSD']) )) else: chainRestraints = ChainRestraints(chain, 'Hbond') sys.stdout.write('%s, chain %s: H-bonds restraints\n' % (os.path.basename(self.pdbToRefine.pdbFileName), chain)) protocol.pdbChainRestraints.append(chainRestraints) protocol.isWindows = isWindows self.protocols.append(protocol) sys.stdout.write('\n') def initialiseProtocols(self, protocol=None): homologsForProtocols = {} for chain in self.pdbToRefine.chains.keys(): homologsForProtocols[chain] = [] homologs = sorted(self.pdbToRefine.chains[chain], key=lambda x: float(x['RMSD']) + float(x['Minimum'])) # ranging homologs by their similarity to the refined structure for homolog in homologs: if float(homolog['SeqIden']) / 100 > MinimalSequenceIdentityForHomologs and \ float(homolog['Percent']) / 100 > MinimalPercentAligned: homologsForProtocols[chain].append(homolog) if protocol: # protocol file was specified by user in the command line sys.stdout.write('Using protocol specified by user: \n') sys.stdout.write('1 - %s\n' % protocol.description) try: for chainRestraint in protocol.pdbChainRestraints: if chainRestraint.type == 'Homologue': for (pdbFileNameHomologue, chainHomologue) in chainRestraint.restraintList: if pdbFileNameHomologue not in [x['pdb'] for x in homologsForProtocols[chainRestraint.chain]] or \ chainHomologue not in [x['chain'] for x in homologsForProtocols[chainRestraint.chain]]: self.die( 'Homologue and chain (%s, %s) from the protocol file (-pr option) does not exist\n or have sequence identity to the target chain lower than %0.2f; terminating' % (os.path.basename(pdbFileNameHomologue), chainHomologue, MinimalSequenceIdentityForHomologs)) sys.stdout.write('%s, chain %s: restraints from %s, chain %s. \n' % (os.path.basename(self.pdbToRefine.pdbFileName), chainRestraint.chain, os.path.basename(pdbFileNameHomologue), chainHomologue )) elif chainRestraint.type == 'Hbond': sys.stdout.write('%s, chain %s: H-bonds restraints\n' % (os.path.basename(self.pdbToRefine.pdbFileName), chainRestraint.chain)) except KeyError: self.die('Target chain, indicated in the protocol file (-pr option), does not exist; terminating') self.protocols.append(protocol) sys.stdout.write('\n') else: # default set of protocols sys.stdout.write('Using default set of protocols for refinement:\n\n') # Protocol 1 - always JellyBody protocol = RefinementProtocol(self.pdbToRefine, 'JellyBody') protocol.description = 'Jelly body only' sys.stdout.write('1 - %s\n\n' % protocol.description) protocol.isWindows = isWindows self.protocols.append(protocol) # Protocols with external homologues if len (homologsForProtocols.values()) > 0: # if homologues are actually there if self.nc > max(len(x) for x in homologsForProtocols.values()): self.nc = max(len(x) for x in homologsForProtocols.values()) # maximal number of chains cant exceed actual number of available chains # Protocols with closest external homologues (from one up to three by default) for i in range(self.nc): self.addProtocol(homologsForProtocols, numberOfHomologs=i + 1, description='External restraints from %d closest homologue(s) (H-bonds for chains without suitable homologue), then jelly body' % ( i + 1)) # Protocols with all homologues (restrain-best and restrain-all) if max(len(x) for x in homologsForProtocols.values()) > 3 \ and self.nc < max(len(x) for x in homologsForProtocols.values()): pdbFileNames = [] for homologList in homologsForProtocols.values(): for homolog in homologList: pdbFileNames.append(homolog['pdb']) pdbFileNames = list(set(pdbFileNames)) # removing duplicates self.addProtocol(pdbFileNames, numberOfHomologs=len(self.homologsFilesList), allHomologs=True, description='External restraints from all the homologues (-restrain_all), then jelly body', restraint='-restrain_all') self.addProtocol(pdbFileNames, numberOfHomologs=len(self.homologsFilesList), allHomologs=True, description='External restraints from all the homologues (-restrain_best), then jelly body', restraint='-restrain_best') #Protocols with .nc most distant chains if self.nc < max(len(x) for x in homologsForProtocols.values()): for i in range(self.nc): self.addProtocol(homologsForProtocols, numberOfHomologs=i + 1, reverse=True, description='External restraints from %d most distant homologue(s) (H-bonds for chains without suitable homologue), then jelly body' % ( i + 1)) # Last protocol - hydrogen bonds protocol = RefinementProtocol(self.pdbToRefine, 'ExternalRestraints') protocol.description = 'Hydrogen bonds restraints for all chains' sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description)) for chain in sorted(homologsForProtocols.keys()): chainRestraints = ChainRestraints(chain, 'Hbond') sys.stdout.write('%s, chain %s: H-bonds restraints\n' % (os.path.basename(self.pdbToRefine.pdbFileName), chain)) protocol.pdbChainRestraints.append(chainRestraints) protocol.isWindows = isWindows self.protocols.append(protocol) sys.stdout.write('\n') def initialise(self, arguments): print headerMessage (self.pdbToRefine, self.homologsFilesList, protocol) = self.parseCommandLine(arguments) sys.stdout.write('\nAnalysing input files: \n') inputAnalysis = InputAnalysis(self.outputDirectory + 'InputAnalysis'+os.sep) inputAnalysis.labin = self.labin inputAnalysis.free_1 = self.free_1 inputAnalysis.molprobity = self.molprobity sys.stdout.write('Analysing data for twinning... ') self.pdbToRefine.twin = inputAnalysis.twinTest(self.pdbToRefine) if self.pdbToRefine.twin: sys.stdout.write('twinning detected; will use "TWIN" keyword for refmac.\n') else: sys.stdout.write('no twinning detected.\n') if self.mr: sys.stdout.write('Running jelly body refinement after MR (it may take a while)... \n') jellyMR = inputAnalysis.mr(self.pdbToRefine) self.pdbToRefine.pdbFileName = os.path.abspath(jellyMR.xyzout) sys.stdout.write('Started from Rfact: %0.3f and Rfree: %0.3f\n' % ( jellyMR.log.getStartRfact(), jellyMR.log.getStartRfree() ) ) sys.stdout.write('Finished with Rfact: %0.3f and Rfree: %0.3f\n' % ( jellyMR.log.getFinalRfact(), jellyMR.log.getFinalRfree() )) sys.stdout.write('\n') sys.stdout.write('Checking best scaling and solvent options... ') self.pdbToRefine = inputAnalysis.scaleTestAndStartingValues(self.pdbToRefine) if self.pdbToRefine.scalingLSQ: sys.stdout.write('LSQ scaling performed better; will use LSQ scaling for refmac.\n') else: sys.stdout.write('standard scaling performed better; will use standard scaling for refmac.\n') sys.stdout.write('Using following solvent parameters: VDWProb %0.2f, IONProb %0.2f, RSHRink %0.2f\n' % ( self.pdbToRefine.solvent['VDWProb'], self.pdbToRefine.solvent['IONProb'],self.pdbToRefine.solvent['RSHRink'] )) sys.stdout.write('Starting from Rfact: %0.3f and Rfree: %0.3f\n' % ( self.pdbToRefine.startingValues['Rfact'], self.pdbToRefine.startingValues['Rfree'])) if self.molprobity: sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' % (self.pdbToRefine.startingValues['ramaOutliers'], self.pdbToRefine.startingValues['ramaFavoured'], self.pdbToRefine.startingValues['clash_percentile'], self.pdbToRefine.startingValues['molprobity_percentile'] ) ) if (not protocol) or (protocol and protocol.type != 'AllHomologs'): sys.stdout.write('Checking external homologs... ') self.pdbToRefine.chains = inputAnalysis.matchToHomologs(self.pdbToRefine.pdbFileName, self.homologsFilesList, ss=self.ss) sys.stdout.write('done.\n') sys.stdout.write('Initialising refinement protocol(s)...\n') if protocol: self.initialiseProtocols(protocol=protocol) else: self.initialiseProtocols() sys.stdout.write('Refinement protocol(s) successfully initialised.\n') def startProtocol(self, protocol=None, dnaRestraintsFileName=''): if not protocol: self.die('No protocol supplied to the runProtocol routine') sys.stdout.write('\nStarting protocol %d out of %d (%s)\n' % (self.protocols.index(protocol) + 1, len(self.protocols), protocol.description),) originalDirectory = os.getcwd() protocol.containerCmdFileName = 'protocol_%d_of_%d.%s' % (self.protocols.index(protocol) + 1, len(self.protocols), comFileExtension) try: protocol.directory = self.outputDirectory + 'protocol_%d_of_%d' % ( self.protocols.index(protocol) + 1, len(self.protocols)) if not os.path.exists(protocol.directory): os.makedirs(protocol.directory) os.chdir(protocol.directory) else: self.die('Protocol directory already exists; terminating. Please make sure you run pipeline in a new directory.') except OSError: self.die('Can`t create directory ' + protocol.directory + '; terminating.') if protocol.type == 'JellyBody': refmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1jelly.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout=os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1jelly.mtz', ncyc=20) if not refmac.successfullyLoaded: self.die('Refmac run initialisation failed') refmac.twin = protocol.pdbToRefine.twin refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ refmac.solvent = protocol.pdbToRefine.solvent if len(self.tls) > 1: refmac.tls = self.tls if len(self.labin) > 1: refmac.labin = self.labin refmac.free_1 = self.free_1 refmac.phased = self.phased refmac.ncs = True refmac.jellyBody = True refmac.molprobity = self.molprobity refmac.isWindows = isWindows protocol.ss = self.ss protocol.containerCmd.append('%s' % refmac.comFileName) protocol.containerCmd.append('\n\n') protocol.refmacs.append(refmac) protocol.started = True # protocol.runContainer() p = multiprocessing.Process(target=protocol.run) p.start() elif protocol.type == 'ExternalRestraints': restraintsFileName = os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_restraints.txt' for chainRestraint in protocol.pdbChainRestraints: prosmart = ProSMART(pdbToRefine=protocol.pdbToRefine, chainRestraint=chainRestraint) prosmart.isWindows = isWindows prosmart.prepareRun() protocol.prosmarts.append(prosmart) protocol.containerCmd.append('%s \n' % prosmart.comFileName) if not isWindows: protocol.containerCmd.append('cat %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName)) else: protocol.containerCmd.append('type %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName)) protocol.restraintsFileName = restraintsFileName if os.path.exists(dnaRestraintsFileName): if not isWindows: protocol.containerCmd.append('cat %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName)) else: protocol.containerCmd.append('type %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName)) protocol.dnaRestraintsFileName = dnaRestraintsFileName #sys.stdout.write('Running Refmac5 with external restraints... ') erhRefmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1ExtRestr.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1ExtRestr.mtz', ncyc=40) if not erhRefmac.successfullyLoaded: self.die('Refmac run initialisation failed') erhRefmac.twin = protocol.pdbToRefine.twin erhRefmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ erhRefmac.solvent = protocol.pdbToRefine.solvent erhRefmac.phased = self.phased erhRefmac.proSMARTParams = True erhRefmac.proSMARTFile = restraintsFileName if len(self.tls) > 1: erhRefmac.tls = self.tls if len(self.labin) > 1: erhRefmac.labin = self.labin erhRefmac.free_1 = self.free_1 erhRefmac.molprobity = self.molprobity erhRefmac.isWindows = isWindows protocol.refmacs.append(erhRefmac) protocol.containerCmd.append('%s \n' % erhRefmac.comFileName) erhRefmac2 = Refmac(xyzin=protocol.pdbToRefine.pdbFileName, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_11ExtRestr.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_11ExtRestr.mtz', ncyc=40) if not erhRefmac2.successfullyLoaded: self.die('Refmac run initialisation failed') erhRefmac2.twin = protocol.pdbToRefine.twin erhRefmac2.scalingLSQ = protocol.pdbToRefine.scalingLSQ erhRefmac2.solvent = protocol.pdbToRefine.solvent erhRefmac2.phased = self.phased erhRefmac2.proSMARTParams = False erhRefmac2.proSMARTFile = restraintsFileName if len(self.tls) > 1: erhRefmac2.tls = self.tls if len(self.labin) > 1: erhRefmac2.labin = self.labin erhRefmac2.free_1 = self.free_1 erhRefmac2.molprobity = self.molprobity erhRefmac2.isWindows = isWindows protocol.refmacs.append(erhRefmac2) protocol.containerCmd.append('%s \n' % erhRefmac2.comFileName) refmac = Refmac(xyzin=erhRefmac.xyzout, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_2jelly.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_2jelly.mtz', ncyc=20) if not refmac.successfullyLoaded: self.die('Refmac run initialisation failed') refmac.twin = protocol.pdbToRefine.twin refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ refmac.solvent = protocol.pdbToRefine.solvent refmac.phased = self.phased refmac.jellyBody = True refmac.ncs = True if len(self.tls) > 1: refmac.tls = self.tls if len(self.labin) > 1: refmac.labin = self.labin refmac.free_1 = self.free_1 refmac.molprobity = self.molprobity refmac.isWindows = isWindows protocol.refmacs.append(refmac) protocol.ss = self.ss protocol.containerCmd.append('%s \n' % refmac.comFileName) protocol.containerCmd.append('\n') protocol.started = True #protocol.runContainer() p = multiprocessing.Process(target=protocol.run) p.start() elif protocol.type == 'AllHomologs': restraintsFileName = os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_restraints.txt' prosmart = ProSMART(pdbToRefine=protocol.pdbToRefine, allHomologs=True, allHomologsList=protocol.homologsFilesList , restraint=protocol.restraint) prosmart.isWindows = isWindows prosmart.prepareRun() protocol.prosmarts.append(prosmart) protocol.containerCmd.append('%s \n' % prosmart.comFileName) if not isWindows: protocol.containerCmd.append('cat %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName)) else: protocol.containerCmd.append('type %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName)) protocol.restraintsFileName = restraintsFileName if os.path.exists(dnaRestraintsFileName): if not isWindows: protocol.containerCmd.append('cat %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName)) else: protocol.containerCmd.append('type %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName)) protocol.dnaRestraintsFileName = dnaRestraintsFileName #sys.stdout.write('Running Refmac5 with external restrains... ') erhRefmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1ExtRestr.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1ExtRestr.mtz', ncyc=40) if not erhRefmac.successfullyLoaded: self.die('Refmac run initialisation failed') erhRefmac.twin = protocol.pdbToRefine.twin erhRefmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ erhRefmac.solvent = protocol.pdbToRefine.solvent erhRefmac.phased = self.phased erhRefmac.proSMARTParams = True erhRefmac.proSMARTFile = restraintsFileName if len(self.tls) > 1: erhRefmac.tls = self.tls if len(self.labin) > 1: erhRefmac.labin = self.labin erhRefmac.free_1 = self.free_1 erhRefmac.molprobity = self.molprobity erhRefmac.isWindows = isWindows protocol.refmacs.append(erhRefmac) protocol.containerCmd.append('%s \n' % erhRefmac.comFileName) erhRefmac2 = Refmac(xyzin=protocol.pdbToRefine.pdbFileName, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_11ExtRestr.pdb', hklin=protocol.pdbToRefine.mtzFileName, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_11ExtRestr.mtz', ncyc=40) if not erhRefmac2.successfullyLoaded: self.die('Refmac run initialisation failed') erhRefmac2.twin = protocol.pdbToRefine.twin erhRefmac2.scalingLSQ = protocol.pdbToRefine.scalingLSQ erhRefmac2.solvent = protocol.pdbToRefine.solvent erhRefmac2.phased = self.phased erhRefmac2.proSMARTParams = False erhRefmac2.proSMARTFile = restraintsFileName if len(self.tls) > 1: erhRefmac2.tls = self.tls if len(self.labin) > 1: erhRefmac2.labin = self.labin erhRefmac2.free_1 = self.free_1 erhRefmac2.molprobity = self.molprobity erhRefmac2.isWindows = isWindows protocol.refmacs.append(erhRefmac2) protocol.containerCmd.append('%s \n' % erhRefmac2.comFileName) refmac = Refmac(xyzin=erhRefmac.xyzout, xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_2jelly.pdb', hklin=erhRefmac.hklin, hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_2jelly.mtz', ncyc=20) if not refmac.successfullyLoaded: self.die('Refmac run initialisation failed') refmac.twin = protocol.pdbToRefine.twin refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ refmac.solvent = protocol.pdbToRefine.solvent refmac.phased = self.phased refmac.jellyBody = True refmac.ncs = True if len(self.tls) > 1: refmac.tls = self.tls if len(self.labin) > 1: refmac.labin = self.labin refmac.free_1 = self.free_1 refmac.molprobity = self.molprobity refmac.isWindows = isWindows protocol.ss = self.ss protocol.refmacs.append(refmac) protocol.containerCmd.append('%s \n' % refmac.comFileName) protocol.containerCmd.append('\n') protocol.started = True #protocol.runContainer() protocol.isWindows = isWindows p = multiprocessing.Process(target=protocol.run) p.start() else: self.die('Unknown protocol type %s - normally this should not happen; terminating.' % protocol.type) try: os.chdir(originalDirectory) except OSError: self.die('Can`t return back to original directory ' + originalDirectory + ' ; terminating.') def runProtocols(self): sys.stdout.write('\nRunning refinement protocol(s)...\n') dnaRestraintsFile = '' if self.dna: prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=self.homologsFilesList , dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep) prosmart.prepareRun() prosmart.run() if prosmart.executed: if os.path.exists(prosmart.restraintsFileName): if os.path.getsize(prosmart.restraintsFileName) > 1: sys.stdout.write('DNA/RNA detected; restraints from homologues generated.\n') dnaRestraintsFile = prosmart.restraintsFileName else: prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=None, dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep) prosmart.prepareRun() prosmart.run() if prosmart.executed: if os.path.exists(prosmart.restraintsFileName): if os.path.getsize(prosmart.restraintsFileName) > 1: sys.stdout.write('DNA/RNA detected; self-restraints generated.\n') dnaRestraintsFile = prosmart.restraintsFileName else: prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=None, dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep) prosmart.prepareRun() prosmart.run() if prosmart.executed: if os.path.exists(prosmart.restraintsFileName): if os.path.getsize(prosmart.restraintsFileName) > 1: sys.stdout.write('DNA/RNA detected; self-restraints generated.\n') dnaRestraintsFile = prosmart.restraintsFileName runningProtocols = [] self.freeCPUs = self.nCPU for protocol in list(self.protocols): self.startProtocol(protocol=protocol, dnaRestraintsFileName=dnaRestraintsFile) runningProtocols.append(protocol) self.freeCPUs -= 1 if self.freeCPUs < 1: # no more free CPUs left while True: # infinite waiting loop until one of the protocols is finished time.sleep(timeQuant) for protocolPoll in list(runningProtocols): # polling all running protocols if not protocolPoll.isRunning(): if protocolPoll.executed: sys.stdout.write('\nProtocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description)) sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' % (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() )) if self.molprobity: sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' % (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) ) runningProtocols.remove(protocolPoll) self.freeCPUs += 1 break # breaking polling of running protocols since one protocol is finished else: # protocol is not running and is not executed means either refmac5 or ProSMART is hanging - need to kill it... sys.stdout.write('\nProtocol "%s" is not responding; terminating it and proceeding to the next\n' % protocolPoll.description) protocolPoll.terminate() runningProtocols.remove(protocolPoll) self.protocols.remove(protocolPoll) self.freeCPUs += 1 break # breaking polling of running protocols since one protocol was just killed if self.freeCPUs >= 1: break # breaking infinite waiting loop to start next protocol while runningProtocols: # let all running protocols to finish time.sleep(timeQuant) for protocolPoll in list(runningProtocols): # polling all running protocols if not protocolPoll.isRunning(): if protocolPoll.executed: sys.stdout.write('\nProtocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description)) sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' % (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() )) if self.molprobity: sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' % (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) ) runningProtocols.remove(protocolPoll) self.freeCPUs += 1 break # breaking polling of running protocols since one protocol is finished else: # protocol is not running and is not executed means either refmac5 or ProSMART is hanging - need to kill it... sys.stdout.write('Protocol "%s" is not responding; terminating it and proceeding to the next\n' % protocolPoll.description) protocolPoll.terminate() runningProtocols.remove(protocolPoll) self.protocols.remove(protocolPoll) self.freeCPUs += 1 break # breaking polling of running protocols since one protocol was just killed # # No sensible code below this line in this method # # # while self.freeCPU < self.nCPU: # busyCPU = 0 # for protocolPoll in self.protocols: # if protocolPoll.started: # time.sleep(timeQuant) # if protocolPoll.isRunning(): # busyCPU += 1 # else: # if not protocolPoll.executed: # p = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE) # out, err = p.communicate() # # for line in out.splitlines(): # if ('refmac5' in line) and (self.outputDirectory[1:] in line): # pid = int(line.split()[1]) # os.kill(pid, signal.SIGKILL) # if ('prosmart' in line) and (self.outputDirectory[1:] in line): # pid = int(line.split()[1]) # os.kill(pid, signal.SIGKILL) # # self.die('Protocol "%s" is not responding; terminating' % protocolPoll.description) # else: # sys.stdout.write('Protocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description)) # sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' % # (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() )) # if self.molprobity: # sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' % # (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) ) # # protocolPoll.started = False # self.freeCPU = self.nCPU - busyCPU def returnBest(self): if self.molprobity: sys.stdout.write('\n\nStatistics on all protocols:\n') qfactDict = {} qfactProtocols = [] rFreeMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree() rFreeMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree() molpPrcntMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getMolprobity_percentile()).refmacs[-1].log.getMolprobity_percentile() molpPrcntMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getMolprobity_percentile()).refmacs[-1].log.getMolprobity_percentile() sys.stdout.write('\nWorst Rfree value: %0.3f, Best Rfree value: %0.3f \nWorst Molprobity percentile: %0.2f, Best Molprobity percentile: %0.2f\n\n' % (rFreeMax, rFreeMin, molpPrcntMin, molpPrcntMax) ) if (molpPrcntMax - molpPrcntMin) >= 0.1: weightTerm = (rFreeMax - rFreeMin) / (molpPrcntMax - molpPrcntMin) else: weightTerm = 0 for protocol in self.protocols: nProtocol = self.protocols.index(protocol) + 1 rfree = protocol.refmacs[-1].log.getFinalRfree() molPercentile = protocol.refmacs[-1].log.getMolprobity_percentile() qfact = rfree + weightTerm * (molpPrcntMax - molPercentile) qfactDict['qfact'] = qfact qfactDict['rfree'] = rfree qfactDict['index'] = nProtocol qfactDict['molPercentile'] = molPercentile sys.stdout.write('Protocol %d : Rfree %0.3f, Molprobity percentile %0.2f, Qscore %0.2f\n' % (nProtocol, rfree, molPercentile, qfact*100)) qfactProtocols.append(copy.deepcopy(qfactDict)) sortedQscoreProtocols = sorted(qfactProtocols, key=lambda x: (x['qfact'], 100 - x['molPercentile'])) # sortedProtocols = sorted(self.protocols, # key=lambda x: min(x.refmacs, key=lambda y: y.log.getMolprobity()).log.getMolprobity()) self.bestProtocol = self.protocols[sortedQscoreProtocols[0]['index'] - 1] #self.bestRefmacRun = min(self.bestProtocol.refmacs, key=lambda y: y.log.getMolprobity()) self.bestRefmacRun = self.bestProtocol.refmacs[-1] sys.stdout.write('\n\nBest performing protocol (basing on Qscore):\n %d - %s\n' % (self.protocols.index(self.bestProtocol) + 1, self.bestProtocol.description)) if self.bestRefmacRun.log.getFinalRfree() > self.pdbToRefine.startingValues['Rfree']: sys.stdout.write('Sorry, this program failed to improve Rfree value for your structure.\n') sys.stdout.write('Rfact (before/after): %0.3f / %0.3f \nRfree (before/after): %0.3f / %0.3f\n' % ( self.pdbToRefine.startingValues['Rfact'], self.bestRefmacRun.log.getFinalRfact(), self.pdbToRefine.startingValues['Rfree'], self.bestRefmacRun.log.getFinalRfree() )) sys.stdout.write('Ramachandran outliers (before/after): %0.2f%% / %0.2f%%\nRamachandran favoured (before/after): %0.2f%% / %0.2f%%\nClashscore percentile (before/after): %0.2f / %0.2f\nMolprobity percentile (before/after): %0.2f / %0.2f\n\n' % (self.pdbToRefine.startingValues['ramaOutliers'], self.bestRefmacRun.log.getRamaOutliers(), self.pdbToRefine.startingValues['ramaFavoured'], self.bestRefmacRun.log.getRamaFavoured(), self.pdbToRefine.startingValues['clash_percentile'], self.bestRefmacRun.log.getClashscore_percentile(), self.pdbToRefine.startingValues['molprobity_percentile'], self.bestRefmacRun.log.getMolprobity_percentile() ) ) else: sys.stdout.write('\n\nStatistics on all protocols:\n') rFreeMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree() rFreeMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree() sys.stdout.write('\nWorst Rfree value: %0.3f, Best Rfree value: %0.3f \n\n' % (rFreeMax, rFreeMin) ) for protocol in self.protocols: nProtocol = self.protocols.index(protocol) + 1 rfree = protocol.refmacs[-1].log.getFinalRfree() sys.stdout.write('Protocol %d : Rfree %0.3f\n' % (nProtocol, rfree)) sortedProtocols = sorted(self.protocols, key=lambda x: x.refmacs[-1].log.getFinalRfree() ) self.bestProtocol = sortedProtocols[0] self.bestRefmacRun = self.bestProtocol.refmacs[-1] sys.stdout.write('\n\nBest performing protocol (basing on Rfree value):\n %d - %s\n' % (self.protocols.index(self.bestProtocol) + 1, self.bestProtocol.description)) if self.bestRefmacRun.log.getFinalRfree() > self.pdbToRefine.startingValues['Rfree']: sys.stdout.write('Sorry, this program failed to improve Rfree value for your structure.\n') sys.stdout.write('Rfact (before/after): %0.3f / %0.3f \nRfree (before/after): %0.3f / %0.3f\n' % ( self.pdbToRefine.startingValues['Rfact'], self.bestRefmacRun.log.getFinalRfact(), self.pdbToRefine.startingValues['Rfree'], self.bestRefmacRun.log.getFinalRfree() )) xmlRoot = ET.Element('Best_protocols_root') xmlBestProtocol = ET.SubElement(xmlRoot, 'Best_protocol') xmlBestProtocol.set('Type', self.bestProtocol.type) xmlBestProtocol.set('Description', self.bestProtocol.description) if self.bestProtocol.type == 'AllHomologs': xmlBPhomologsFilesList = ET.SubElement(xmlBestProtocol, 'homologsFilesList') nextElement = ET.SubElement(xmlBPhomologsFilesList, 'restrain') nextElement.text = '"%s"' % self.bestProtocol.restraint for homolog in self.bestProtocol.homologsFilesList: nextElement = ET.SubElement(xmlBPhomologsFilesList, "id%d" % self.bestProtocol.homologsFilesList.index(homolog)) nextElement.text = '"%s"'% homolog else: xmlBPpdbChainRestraints = ET.SubElement(xmlBestProtocol, 'pdbChainRestraints') for pdbChainRestraint in self.bestProtocol.pdbChainRestraints: nextElement = ET.SubElement(xmlBPpdbChainRestraints, pdbChainRestraint.type) nextElement.text = pdbChainRestraint.chain i = 1 for (pdbFileName, chain) in pdbChainRestraint.restraintList: nextElement.set("id%d" % i, '%s;@;%s' % (pdbFileName, chain)) i += 1 indent(xmlRoot) xmlBestProtocolsTree = ET.ElementTree(xmlRoot) xmlBestProtocolsTree.write(self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.protocol') try: if self.xyzout: self.bestPDB = self.xyzout else: self.bestPDB = self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.pdb' if self.hklout: self.bestMTZ = self.hklout else: self.bestMTZ = self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.mtz' shutil.copyfile(self.bestRefmacRun.xyzout, self.bestPDB) shutil.copyfile(self.bestRefmacRun.hklout, self.bestMTZ) shutil.copytree(self.bestProtocol.directory, self.outputDirectory + 'protocol_best'+ os.sep) except IOError: self.die('Can`t copy pdb and mtz files of the best performing protocol; terminating.') sys.stdout.write('\nFinal PDB file: %s \n' % os.path.abspath(self.bestPDB) ) sys.stdout.write('Final MTZ file: %s \n\n' % os.path.abspath(self.bestMTZ) ) if __name__ == "__main__": ####################################### #### Adjustable global parameters #### ####################################### minimalLengthOfProtein = 30 #amino acids rmsdTreshold = 0.1 # structures with RMSD to the target structure lower than this treshold will be rejected as almost identical MinimalSequenceIdentityForHomologs = 0.75 MinimalPercentAligned = 0.75 # This waiting time could be increased for unusually large complexes (up to 30-60 seconds, if not more) timeQuant = 10 # seconds, sleeping time before re-polling all running jobs # Windows compatibility shellHeader = '#!/bin/sh\n\n' # for Linux/MacOS comFileExtension = 'com' isWindows = False ####################################### if 'Windows' in platform.system(): isWindows = True shellHeader = '' comFileExtension = 'bat' pipeline = Pipeline() pipeline.initialise(sys.argv[1:]) pipeline.runProtocols() pipeline.returnBest()