#!/usr/bin/python import os,sys,shutil,subprocess,copy from process import process from program import program import common import fileinput par=common.parameter class mbref(process): name="iterative model building and refinement" short_name="model tracing with refinement" supported_procs=["ref", "mb", "dmfull"] supported_progs=["arpwarp"] supported_params={} supported_params['target'] = par( desc='Experiment/refinement target', typ=str, cap=True, share=True ) supported_params['bigcyc'] = par( desc='Number of cycles of model building', typ=int ) supported_params['minbigcyc'] = par( desc='Minimal number of model building cycles (only used for with_dm)', typ=int ) supported_params['low_res_par'] = par( desc='Use low resolution parameters in refinement and model building.', typ=bool, share=True ) supported_params['arp_sad'] = par( desc='Use the original ARP/wARP SAD procedure', typ=bool ) supported_params['catch_output'] = par( desc='Catch program output continously', typ=bool ) supported_params['with_dm'] = par( desc='Include dens.modif. iterations in building', typ=bool ) supported_params['start_shelxe'] = par( desc='Use SHELXE in initial model building cycles then use Buccaneer', typ=bool, share=True ) def RunPreprocess(self, rundir=None, **kwargs): process.RunPreprocess(self,rundir,kwargs) # get the number of monomers for arpwarp if that may be needed if 'arpwarp' in self.programs and not self.inp.Get(has_monomers=True) and self.inp.Get('sequence'): matthews=self.AddProcess('matthews', propagate_out=False) matthews.Run() self.processes.remove(matthews) # run shelxc to get the ins file for shelxe if self.GetProcess('mb') and self.mb.GetProg('shelxe'): shelxc=self.AddProg('shelxc') shelxc.Run() self.inp.Add(shelxc.out.Get('datafile',filetype='ins')) def Init(self): self.mb_res_all, self.ref_res_all = [], [] self.stop_message=None self.cyc_fin,self.R_thres=None,0.42 self.result_str = "" self.i2_R = 1.0 self.i2_Rfree = 1.0 self.hydrog_no_set = None def TreatInOutPar(self, set_all_par=False): # defaults if no program and no subprocess is specified if not self.GetProg(supported=True) and not self.GetProcess(supported=True): # maybe we could use arpwarp by default if a) available, b) high res (<2.5) ? self.AddProcess('ref') self.AddProcess('mb') if self.GetProcess(supported=True): # assign the child processes to specific class attributes for convenience self.ph, self.mb, self.dm = self.GetOrAddProcess('ref'), self.GetOrAddProcess('mb'), None # check and set some defaults crank=self.GetCrankParent() if self.GetParam('with_dm') is None and crank and not crank.GetProcess('phdmmb'): self.SetParam('with_dm') if self.GetParam('with_dm'): if not self.GetParam('bigcyc'): self.SetParam('bigcyc',15) if not self.GetParam('minbigcyc'): self.SetParam('minbigcyc',min(5,self.GetParam('bigcyc'))) if self.GetParam('bigcyc')self.R_thres-0.05): self.dm.GetProcess('dm').inp.Add(self.ph.out.Get('mapcoef',typ='combined')) ###!!!!!! if self.cyc_fin: self.dm.SetParam('dmcyc',4) self.dm.Run() cphcomb=self.AddProg('cphasecombine', propagate_out=False) cphcomb.inp.Set([self.dm.out.Get('mapcoef',typ='combined'),]+[self.ph.out.Get('mapcoef',typ='combined')]) ###!!!!!!if self.cyc_fin: ### cphcomb.SetKey('weight-hl-1',0.65), cphcomb.SetKey('weight-hl-2',0.35) cphcomb.Run() self.mb.inp.Set(cphcomb.out.mapcoef) self.programs.remove(cphcomb) # skipping correlation mode if number of cycles is large enough if self.mb.cyc%3==0 and not self.cyc_fin and self.GetParam('bigcyc')-self.mb.cyc>3 and self.mb.GetProg('buccaneer'): #!!!!!!! #if self.mb.cyc%3==0 and self.GetParam('bigcyc')-self.mb.cyc>3 and self.mb.GetProg('buccaneer'): self.mb.GetProg('buccaneer').SetKey('fast',False,keep_previous=False) self.mb.inp.Clear('model') #!!!!!!! #self.ph.inp.Delete( self.ph.inp.Get('mapcoef',typ='best',custom='mlhl') ) if not self.mb.GetProg(): common.Error('No program specified for {0}'.format(self.mb.name)) #self.mb.GetProg().runname=self.mb.GetProg().name+'_cyc'+str(self.mb.cyc) self.mb.GetProg(supported=True).SetRunDir(os.path.join(self.mb.rundir,self.mb.GetProg(supported=True).nick+'_cyc'+str(self.mb.cyc))) self.mb.Run() self.UpdateResults('mb') # run refinement if self.ph.out.Get('model',typ='substr',filetype='pdb'): self.ph.inp.Set(self.ph.out.Get('model',typ='substr',filetype='pdb')) self.ph.inp.Add(self.mb.out.Get('model')) self.ph.GetProg(supported=True).SetRunDir(os.path.join(self.ph.rundir,self.ph.GetProg(supported=True).nick+'_cyc'+str(self.mb.cyc))) self.ph.Run() self.UpdateResults('ph') # adding hydrogens if R is good (and not at low res) if self.hydrog_no_set and self.ref_res_all and self.ref_res_all[-1][0]<0.4: self.ph.GetProg('refmac').SetKey('make',False,keep_previous=False) # switch to buccaneer from shelxe if self.mb.GetProg('buccaneer') and not self.mb.GetProg(supported=True).nick=='buccaneer': if self.mb.cyc>=min(3,self.GetParam('bigcyc')/2) or self.ph.R=self.cyc_fin: self.stop_message='Majority of the model should be successfully built.' if self.stop_message: self.Info(self.stop_message) break # set correlation mode for Buccaneer etc if self.mb.GetProg('buccaneer'): #!!!!!!!! #if not self.ph.inp.Get('mapcoef',typ='best',custom='mlhl'): # self.ph.inp.AddCopy( self.dm.inp.Get('mapcoef',typ='best',custom='mlhl') ) self.mb.GetProg('buccaneer').inp.Add( self.out.Get('model',typ='partial') ) self.mb.GetProg('buccaneer').SetKey('fast',True,keep_previous=False) if not self.mb.GetProg('buccaneer').IsKey('cycles'): self.mb.GetProg('buccaneer').SetKey('cycles',2) self.UpdateResults('end') else: # arpwarp (or possibly other mb+ref programs in the future) arp_auto=self.GetProg('arpwarp_auto') arp=self.GetProg('arpwarp') if arp_auto: if not self.inp.Get('mapcoef',typ=('combined','best'),col=('ph','fom')) and self.inp.Get('mapcoef',typ=('combined','best'),col='hla'): fom2hl=self.AddProg('chltofom',propagate_out=False) fom2hl.Run() self.programs.remove(fom2hl) arp_auto.Run() if arp: arp.inp.Add(arp_auto.out.Get('datafile',typ='par')) arp.fsf=arp_auto.fsf arp.SetRunDir(arp_auto.rundir) for line in fileinput.input(arp.inp.Get(typ='par').GetFileName(), inplace=True): if line.startswith('set JOB_ID'): line='set JOB_ID = arp\n' sys.stdout.write(line) if arp: if self.GetParam('target') == 'SAD' and not self.GetParam('arp_sad'): parf=arp.inp.Get(typ='par').GetFileName() obj,N=self.inp.GetTargetObjects(self.GetParam('target'), modtyp='pdb', fsftyp='mtz') if obj is None: common.Error('Input data/model for ARP/wARP SAD could not be retrived.') with open(parf,'a') as f: f.write('set phaseref = SADH\n') f.write("set F_1 = '{0}'\n".format(obj['f+'].GetLabel('f'))) f.write("set SIGF_1 = '{0}'\n".format(obj['f+'].GetLabel('sigf'))) f.write("set F_2 = '{0}'\n".format(obj['f-'].GetLabel('f'))) f.write("set SIGF_2 = '{0}'\n".format(obj['f-'].GetLabel('sigf'))) for at in obj['mod'].GetAtomTypes(getlist=True): fp,fpp,dn,att=obj['mod'].Getfpfpp(at,obj['f+'].dname) if fp is not None and fpp is not None: f.write("set ANOM = ' FORM {0} {1} {2}'\n".format(obj['mod'].GetAtomType(),fp,fpp)) f.write("set HEAVYIN = '{0}'\n".format(obj['mod'].GetFileName('pdb'))) ### this should be disabled and the various heavy atom exceptions may be removed from dummy/refmac #f.write("set heavyin = '{0}'\n".format(obj['mod'].GetFileName('pdb'))) dummy_env = os.path.join( os.getenv('CRANK2'), 'bin', 'dummy' ) if not os.path.isdir(dummy_env): dummy_env = os.path.join( os.getenv('CCP4'), 'share', 'ccp4i', 'crank', 'bin', 'dummy' ) inp_parf=os.path.join(os.path.dirname(parf),'input.par') shutil.copy(parf, inp_parf) shutil.copy(obj['mod'].GetFileName('pdb'), obj['mod'].GetFileName('pdb')+'.ref') arp.env['PATH'] = "{0}{2}{1}".format(dummy_env, os.getenv('PATH'), os.pathsep) arp.env['CCP4I_TOP'] = dummy_env arp.env['CBIN'] = dummy_env arp.Run() self.UpdateResults('end') #with open('arp_warp.log','w') as g: # subprocess.call( [os.path.join(os.getenv('warpbin'),'warp_tracing.sh'),inp_parf,'1'], stdout=g ) def UpdateInfo(self,line): # this is arpwarp specific; just pushes results from arpwarp log through UpdateResults() prog=self.GetProg(supported=True) if not prog or prog.nick!='arpwarp': return self.UpdateResults('arpwarp',line) def UpdateResults(self, update, line=None): if update=='arpwarp': mbref_stat, update = self.GetProg(supported=True).GetStatGrep, '' if mbref_stat('build_cycle',from_str=line): self.cyc=mbref_stat('build_cycle',from_str=line) self.act_mb_res = [] elif mbref_stat('rfact', from_str=line): self.ref_res_all.append( (mbref_stat('rfact',from_str=line), 0.0, mbref_stat('rfree',from_str=line)) ) self.i2_R = self.ref_res_all[-1][0] update='ph' elif mbref_stat('res_built',from_str=line): self.act_mb_res.append( (mbref_stat('res_built',from_str=line), mbref_stat('frag_built',from_str=line)) ) elif mbref_stat('round_cycle',from_str=line): res_frac = self.act_mb_res[mbref_stat('round_cycle',from_str=line)-1] self.mb_res_all.append( (self.cyc, res_frac[0], res_frac[1], None,None) ) update='mb' elif update=='mb': mb_stat = self.mb.GetProg(supported=True).GetStat if self.mb.GetProg(supported=True).nick=='shelxe': self.mb_res_all.append( (self.mb.cyc, mb_stat('res_built'), len(filter(None,mb_stat('res_built_per_frag'))), 0, 0) ) else: self.mb_res_all.append( (self.mb.cyc, mb_stat('res_built'), mb_stat('frag_built'), mb_stat('compl_chain'), mb_stat('compl_res')) ) if not self.mb_res_all[-1][1]: self.result_str = "No residues could be traced, map too noisy. Structure solution was unsuccessful." common.Error(self.result_str, nosuccess=True) elif update=='ph': ph_stat = self.ph.GetProg(supported=True).GetStat self.ref_res_all.append( (ph_stat('rfact')[-1], ph_stat('fom'), (ph_stat('rfree',accept_none=True) or [None])[-1]) ) self.i2_R, self.i2_Rfree = self.ref_res_all[-1][0], self.ref_res_all[-1][2] if self.cyc_fin is None and self.i2_R0.25): self.result_str = "Partial model was probably correctly built." else: self.result_str = "The structure solution was unsuccessful." #self.result_str = "Final model contains {0} residues and provides R-factor of {1}.".format( # self.mb_res_all[-1][1], self.ref_res_all[-1][0]) if update: self.PrintActualLogGraph(update=update) def PrintActualLogGraph(self, update=None): if self.opened_loggraph: self.GetLogGraphHandle().seek(0,0) self.LGInfo(self.GetCCP4Header()) self.LGInfo(self.GetLogGraphPrograms()) if self.ref_res_all: self.LGInfo('\n $TABLE : R-factor and FOM per cycle:') self.LGInfo('$GRAPHS : R-factor and FOM per cycle :A:1,2,3:\n$$') self.LGInfo('Cycle R-factor FOM $$ $$') for cyc,(R,fom,Rfree) in enumerate(self.ref_res_all): self.LGInfo('{0:3} {1:8.3f} {2:8.3f}'.format(cyc+1,R,fom)) self.LGInfo('$$\n') if self.mb_res_all: self.LGInfo('\n $TABLE : Built per building cycle:') self.LGInfo('$GRAPHS : Residues and fragments per cycle :A:1,2,3:\n$$') self.LGInfo('Cycle Residues Fragments $$ $$') for cyc,res,fr,c1,c2 in self.mb_res_all: self.LGInfo('{0:3} {1:8} {2:8}'.format(cyc,res,fr)) self.LGInfo('$$\n') if update=='end': self.LGInfo('\n$TEXT:Result: $$ Final result $$') self.LGInfo("{0}\n$$\n".format(self.result_str)) if self.rv_report is not None: if not hasattr(self,'rv_plotmb'): self.rv_plotmb = self.rv_report.Plot( 'Residues and fragments vs build cycle', "Cycle", \ "Number of residues and fragments", block="Model building statistics", legendloc='nw', ymin=0., intx=True ) self.rv_res = self.rv_plotmb.PlotLine(["x"],["residues"]) self.rv_fr = self.rv_plotmb.PlotLine(["x"],["fragments"]) self.rv_fr.CustomTick( "x", 0, "0" ) if self.mb_res_all and update=='mb': if len(self.ref_res_all)==10: self.rv_fr.Reset(reset_data=False) cyc,res,fr,c1,c2 = self.mb_res_all[-1] cst = len(self.mb_res_all) if len(self.mb_res_all)<10 else False self.rv_res.Data( cyc, res, int ) self.rv_fr.Data( cyc, fr, int, custom_x_tick=cst, flush=True ) if self.ref_res_all and update=='ph': if not hasattr(self,'rv_plotref'): self.rv_plotref = self.rv_plotmb.parent.parent.Plot( 'R-factor and FOM vs refinement cycle', \ "Cycle", "R-factor and FOM", block="Refinement statistics", legendloc='nw', ymin=0., intx=True ) self.rv_fom = self.rv_plotref.PlotLine(["x"],["FOM"]) self.rv_R = self.rv_plotref.PlotLine(["x"],["R"]) self.rv_R.CustomTick( "x", 0, "0" ) self.rv_Rfree = self.rv_plotref.PlotLine(["x"],["Rfree"], color='darkorchid') if (not self.parent_process or not self.parent_process.ccp4i2) and self.out.Get('mapcoef',typ='combined'): # this displays the actual pdb+mtz updated cycle by cycle # the files are continuously rewritten which might cause issues - we'll need to test! out_pdb = self.out.Get('model',typ=('partial','partial+substr')).GetFileName() out_mtz = self.out.Get('mapcoef',typ='combined').GetFileName() out_map = self.out.Get('mapcoef',typ='weighted',filetype='map').GetFileName('map') out_dmap = self.out.Get('mapcoef',typ='diff',filetype='map',conv_opts=['no_rewrite']).GetFileName('map') self.rv_report.Text("
Output (updating after each refinement cycle):") self.rv_files = self.rv_report.DataFile(out_pdb, "xyz", "Built model and map") self.rv_files.DataFile(out_mtz, "hkl:map") self.rv_files.DataFile(out_map, "hkl:ccp4_map") self.rv_files.DataFile(out_dmap, "hkl:ccp4_dmap", flush=True) if len(self.ref_res_all)==10: self.rv_R.Reset(reset_data=False) R,fom,Rfree = self.ref_res_all[-1] cst = len(self.ref_res_all) if len(self.ref_res_all)<10 else False if fom: self.rv_fom.Data( len(self.ref_res_all), fom, int ) if Rfree: self.rv_Rfree.Data( len(self.ref_res_all), Rfree, int, custom_x_tick=cst ) self.rv_R.Data( len(self.ref_res_all), R, int, custom_x_tick=cst, flush=True ) if update=='end': if self.stop_message: self.rv_report.Text("Stop:") self.rv_report.Text(' '+self.stop_message) self.rv_report.Text("Result:") self.rv_report.Text(' '+self.result_str, flush=True)