#!/usr/bin/env ccp4-python import cPickle import os import shutil import subprocess import sys sys.path.insert(0, "/opt/ample-dev1/python") sys.path.insert(0, "opt/ample-dev1/scripts") import add_sidechains_SCWRL import ample_util from analyse_run import AmpleResult import pdb_edit #workdir = "/home/jmht/coiled-coils/ideal_helices" workdir = "/home/jmht/Documents/work/CC/ideal_helices" #smDir = "/home/jmht/coiled-coils/single_model" ensembleDir = "/media/data/shared/coiled-coils/ensemble/ensemble.run2" chainMakerExe="/home/jmht/Documents/avogadro/chainMaker/chainMaker" scwrlExe="/opt/scwrl4/Scwrl4" pfile = "/home/jmht/Documents/work/CC/ensemble_results/ar_results.pkl" os.chdir(workdir) # We've generated multiple sequences but many models will map onto each sequence # For each model we are only interested if the sequence solves the structure - we can # link back to the original ensemble later using a dictionary # Maps the sequence to the list of ensembles sequence2ensemble = {} pdbCode2Col = {} # Unpickle the results with open( pfile ) as f: allResults = cPickle.load( f ) scwrl = add_sidechains_SCWRL.Scwrl( scwrlExe=scwrlExe ) pdbEdit = pdb_edit.PDBEdit() for result in allResults: # We only care about succeess if not ( result.shelxeCC >= 25 and result.shelxeAvgChainLength >= 10 ): continue # Similar to the single_models we only process the first radius ensembles if result.ensembleRadiusThreshold != 1: continue # See if we already have a dict for this target if result.pdbCode not in sequence2ensemble: sequence2ensemble[ result.pdbCode ] = {} # Need to extract the column labels for the mtz file - use the old script s = os.path.join( ensembleDir, result.pdbCode, result.ensembleName+".sub" ) with open(s) as f: for line in f: if line.startswith("LABIN"): #LABIN SIGF=SIGFP F=FP FreeR_flag=FREE f = line.strip().split() pdbCode2Col[ result.pdbCode ] = {} l,v = f[1].split("=") assert l =="SIGF" pdbCode2Col[ result.pdbCode ]['SIGF'] = v l,v = f[2].split("=") assert l =="F" pdbCode2Col[ result.pdbCode ]['F'] = v l,v = f[3].split("=") assert l =="FreeR_flag" pdbCode2Col[ result.pdbCode ]['FREE'] = v break # We need a helix if result.rioHelixSequence is None or len( result.rioHelixSequence ) < 3: continue # Got sequence so see if we already have it for this target if result.rioHelixSequence in sequence2ensemble[ result.pdbCode ]: # Add it to the list sequence2ensemble[ result.pdbCode ][ result.rioHelixSequence ].append( result.ensembleName ) else: # New sequence so create a fresh list sequence2ensemble[ result.pdbCode ][ result.rioHelixSequence ] = [ result.ensembleName ] scriptDir = os.path.join( workdir, result.pdbCode ) if not os.path.isdir( scriptDir ): os.mkdir( scriptDir ) os.chdir( scriptDir ) # Copy files in ofasta = os.path.join( ensembleDir, result.pdbCode, "{0}_1.fasta".format( result.pdbCode ) ) fasta = os.path.join( scriptDir, "{0}_1.fasta".format( result.pdbCode ) ) shutil.copy2( ofasta, fasta ) omtz = os.path.join( ensembleDir, result.pdbCode, "{0}-cad.mtz".format( result.pdbCode ) ) mtz = os.path.join( scriptDir, "{0}-cad.mtz".format( result.pdbCode ) ) shutil.copy2( omtz, mtz ) print "processing {0}-{1} in dir {2}".format( result.pdbCode, result.rioHelixSequence, scriptDir ) # Generate full pdb from helix sequence #HACK REPLACE ANY X with A sequence = result.rioHelixSequence.replace('X','A') helixPdb1 = os.path.join( scriptDir, result.rioHelixSequence + ".pdb" ) cmd = [ chainMakerExe, "-s", sequence, "-o", helixPdb1 ] #print "RUNNING "," ".join(cmd) retcode = ample_util.run_command(cmd, dolog=False) if not os.path.isfile( helixPdb1 ): raise RuntimeError,"ERROR creating PDB: {0}".format( helixPdb1 ) # Optimise sidechain configuration with Scwrl helixPdbAA = os.path.join( scriptDir, result.rioHelixSequence + "_All_atom.pdb" ) scwrl.addSidechains( pdbin=helixPdb1, pdbout= helixPdbAA ) helixPdbScwrl = os.path.join( scriptDir, result.rioHelixSequence + "_SCWRL_reliable_sidechains.pdb" ) helixPdbPolya = os.path.join( scriptDir, result.rioHelixSequence + "_poly_ala.pdb" ) # Now strip back as we did for the full ensembles pdbEdit.reliable_sidechains( helixPdbAA, helixPdbScwrl ) pdbEdit.backbone( helixPdbAA, helixPdbPolya ) # # Create scripts to run them all # for t in [ 'All_atom', 'SCWRL_reliable_sidechains', 'poly_ala']: newScript = os.path.join( scriptDir, result.rioHelixSequence+"_{0}.sub".format( t ) ) stext = """#!/bin/bash #$ -j y #$ -cwd #$ -w e #$ -V #$ -o /data1/jmht/ideal_helices/{0}/{1}_{2}.log #$ -N J_{0} #$ -pe threaded 16 pushd /data1/jmht/ideal_helices/{0} setenv CCP4_SCR $TMPDIR mrbump HKLIN /data1/jmht/ideal_helices/{0}/{0}-cad.mtz SEQIN /data1/jmht/ideal_helices/{0}/{0}_1.fasta HKLOUT OUT.mtz XYZOUT OUT.pdb << eof LABIN SIGF={3} F={4} FreeR_flag={5} JOBID {0}_{1}_{2} MRPROGRAM phaser LOCALFILE /data1/jmht/ideal_helices/{0}/{1}_{2}.pdb CHAIN ALL RMS 0.1 SCOPSEARCH False PQSSEARCH False SSMSEARCH False FAST False DOFASTA False MDLD False MDLC False MDLM False MDLP False MDLS False MDLU True UPDATE False BUCC False BCYCLES 5 ARPWARP False ACYCLES 10 SHELXE True SHLXEXE /home/jmht/bin/shelxe SCYCLES 15 FIXSG True SXREBUILD TRUE CHECK False LITE True PICKLE False TRYALL True USEACORN False USEENSEM False CLEAN False DEBUG True PJOBS 16 PKEY KILL TIME 360 END eof popd """.format( result.pdbCode, result.rioHelixSequence, t, pdbCode2Col[ result.pdbCode ]['SIGF'], pdbCode2Col[ result.pdbCode ]['F'], pdbCode2Col[ result.pdbCode ]['FREE'], ) with open( newScript, 'w') as f: f.write(stext ) #break # Now write out mapping file os.chdir(workdir) with open("ensemble2Helix.txt", 'w') as f: for pdbCode in sequence2ensemble.keys(): print sequence2ensemble[ pdbCode ] for sequence, elist in sequence2ensemble[ pdbCode ].iteritems(): for ensemble in elist: f.write( "{0} {1} {2}\n".format( pdbCode, ensemble, sequence ) ) with open("ensemble2Helix.pkl", 'w') as f: cPickle.dump( sequence2ensemble, f)