import os,sys
import pkgutil,shutil,copy,math
import common,processes,ccp4i2crank,parse
from process import process, crvapi
from program import program
class crank(process):
name='CRANK2'
ccp4i2=None
# parameters
supported_params = {}
supported_params['target'] = common.parameter( desc='Experiment/refinement target', typ=str, cap=True )
supported_params['replace_met_mse'] = common.parameter( desc='Replace methionines by selenomethionines in input model(s) with SE substructure', typ=bool )
supported_params['force_xdnames'] = common.parameter( desc='Replace the inputted xname/dname in the generated mtz files', typ=bool )
# all processes
supported_procs = [ n for _,n,_ in pkgutil.iter_modules( [os.path.dirname(processes.__file__)] ) ]
# filehandle of the loggraphfile - LGInfo() will send output to this filehandle if not None
loggraphfilehandle=None
loggraph_actfilehandle=None
x_rename, d_rename = {}, {}
# ccp4 version is determined at the beginning at saved here (for loggraph printing)
ccp4_version=None
# peak and inflection point wavelengths for some atoms
# used to guess the peak/infl/remote assign. - this can be used as the last resort for shelxc
# also used the other way around, ie use the theoretical values if dtype (peak/infl etc) was specified
w_pi = { "SE": [(0.9793,0.97944)], "HG": [(1.0092,1.0098)], "AU": [(1.0400,1.0405),(0.9026,0.9028)], \
"PT": [(1.0721,1.0725),(0.9339,0.9341)], "ZN": [(1.2836,1.2841)], "BR": [(0.9199,0.9201)] }
# references
references = ( 'Skubak P, Pannu NS (2013) Automatic protein structure solution from weak X-ray data.' +
'Nature Communications 4, 2777.',
'Pannu NS, Waterreus WJ, Skubak P, Sikharulidze I, Abrahams JP and ' +
'de Graaff RAG (2011) Recent advances in the CRANK software suite ' +
'for experimental phasing. Acta Cryst. D67, 331-337.', )
def TreatInOutPar(self, set_all_par=False, emulate=False, initialized=True):
if self.GetParam('replace_met_mse') is None and not self.inp.Get('model',('substr','partial+substr')):
for mod in self.inp.GetAll('model',has_atomtypes=True):
if 'SE' in mod.GetAtomTypes():
self.SetParam('replace_met_mse', True)
# the target setting requires initialization. all dependend on target should come after initialization!
if initialized:
# if S substr. than setting S subst. for native too by default (can be disabled eg by inputting empty native model without anom. atoms)
if self.GetParam('target'):
self.CheckSSADnative()
else:
# automatic determination of the experiment/target from the data.
for targ in ('MAD','SIRAS','SAD'):
if self.inp.GetTargetObjects(targ,inten=None,no_warn=True)[0]:
self.SetParam('target',targ)
break
else:
common.Warning("Experiment type/target could not be determined from the input data.")
# taking care of SAD/SIRAS subtleties
if self.GetParam('target'):
self.CheckSSADnative()
if targ=='SIRAS':
(obj,N),sfi=self.inp.GetTargetObjects(targ,inten=False),'f'
if obj is None:
(obj,N),sfi=self.inp.GetTargetObjects(targ,inten=True),'i'
#!!!!! temporary till refmac SIRAS crash is fixed...
#if 'phas' in [p.nick for p in self.processes]:
# self.SetParam('target','SAD')
if obj and self.inp.Get('model',typ=('substr','partial+substr'),xname=obj['fn'].GetCrystalName()):
substr_not_in_native_input=self.inp.Get(is_native=True,has_num_atoms=True) and self.inp.Get(is_native=True,has_num_atoms=True).exp_num_atoms==0
if not substr_not_in_native_input:
self.SetParam('target','SAD')
elif obj and obj['f+'].GetFileName('mtz') and obj['fn'].GetFileName('mtz'):
sft=self.AddProg('sftools')
sft.SetKey('read', ['"'+obj['f+'].GetFileName('mtz')+'"','col','"'+obj['f+'].GetLabel(sfi)+'"'])
sft.SetKey('read', ['"'+obj['fn'].GetFileName('mtz')+'"','col','"'+obj['fn'].GetLabel(sfi)+'"'])
sft.SetKey('correl', ('col','1','2') ), sft.SetKey('quit\nY')
sft.Run()
corr=sft.GetStat('correl')
self.programs.remove(sft)
if corr>0.999:
self.SetParam('target','SAD')
if not emulate and self.GetParam('target'):
self.Info('{0} phasing experiment.'.format(self.GetParam('target')))
process.TreatInOutPar(self,set_all_par)
def CheckSSADnative(self):
obj,N=self.inp.GetTargetObjects(self.GetParam('target'),inten=None)
if obj and 'fn' in obj and 'mod' in obj and obj['mod'].GetAtomType()=='S' and (not 'modn' in obj or not obj['modn']):
self.inp.AddNew('model',xname=obj['fn'].GetCrystalName(),atomtype1='S',typ='substr',custom='native')
def RunPreprocess(self,emulate=False,*args,**kwargs):
if hasattr(self,'logout') and not hasattr(self,'stdout_save'):
self.Info('Output will be redirected to {0}'.format(self.logout))
self.stdout_save = sys.stdout
sys.stdout = open(self.logout,'w',1)
global crvapi
if hasattr(self,'disable_rvapi') and crvapi:
self.Info('Disabling rvapi output.')
crvapi = False
if hasattr(self,'rvapi_uri_prefix') and crvapi:
crvapi.uri_prefix = self.rvapi_uri_prefix
if crvapi:
crvapi.tree = False if hasattr(self,'rvapi_no_tree') else True
crvapi.separate_steps = True if hasattr(self,'rvapi_separate_steps') else False
self.TreatInOutPar(emulate=emulate,initialized=False)
process.RunPreprocess(self,emulate=emulate,crvapi=crvapi,*args,**kwargs)
self.FixInpData()
self.DetermineUnknownModel(emulate=emulate)
self.GetAnomScatCoefs()
if not emulate:
self.FixInpModels()
def RunBody(self, emulate=False, set_all_par=False, *args, **kwargs):
"""Runs CRANK2 job."""
self.run.inp = self.inp
error=self.Initialize(self,emulate=emulate)
self.run.TreatInOutPar(set_all_par=set_all_par, emulate=emulate)
if error:
if not emulate:
common.Error(error)
else:
common.Warning(error+' Stopping preliminary!')
# the emulation goes through the TreatInOutPar of all steps without actually running them
# if set_all_par is True then for all the steps all the parameters are set and printed
for ip,p in enumerate(self.processes):
# at this point we are dealing with "dummy" preparation process instances
# (this is in order to write desired XML files by conversion from dummy instances)
# first pass output from the previous step to input of the current step
#if not emulate:
self.PassPrevStepsOutput(p,ip,emulate)
# set target for the process if needed
self.SetTargetDefault(p,emulate=emulate)
# pass previous phasing/dm jobs to the handdet process
if p.nick=='handdet':
self.PassToHandDet(p,ip)
# write XML of process p and create its run instance by reading it
p_run = self.SubProcess_WriteXML_CreateRun(p,ip,emulate=emulate)
# this will assure that if eg cell params are found but no spacegroup then -d will return them
if error and emulate:
continue
# any other general process initialization if needed
self.Initialize(p_run)
if not emulate:
# run the p_run process
self.RunSubProcess(p_run,ip)
# pass the chosen hand mtz's/pdb's to all the subsequent steps
if p.nick=='handdet':
self.PassChosenHand(p_run,ip)
else:
p_run.TreatInOutPar(set_all_par)
# emulation cleanup
if emulate:
os.chdir(self.rundir)
for p_run in self.run.processes:
if os.path.isdir(p_run.rundir) and os.path.realpath(os.path.realpath(p_run.rundir)).startswith(self.rundir):
shutil.rmtree(p_run.rundir)
def CheckBinaries(self):
# checks whether the binaries needed exist (and their versions if such a check is defined for program)
# first check sftools (it is needed to figure out what other programs will be used)
program.from_name('sftools',None).CheckBinary()
# let us construct a copy of the crank used for the run and run TreatInOutPar()
# for each step so that the default programs are assigned
self.emulate = self.GetProcessCopy()
#crank = self.AddProcessCopy(self)
#self.processes.remove(crank)
self.emulate.run.check_binaries=True
self.emulate.RunBody(emulate=True)
checked=[]
for p in self.emulate.run.GetFlatSubTree():
if hasattr(p,'binary'):
program.TreatParams(p)
# make sure each program is included only once
if p.binary not in checked:
p.CheckBinary()
checked.append(p.binary)
def GetCCP4Version(self):
# check/get ccp4 library version
if not self.ccp4_version:
bp3=program.from_name('bp3',None)
bp3.SetRunDir('checkbinary')
bp3.runname='{0}_ccp4_version'.format(self.name)
bp3.ExternalRun( [bp3.binary, '-i'], [] )
self.ccp4_version = bp3.GetStat('ccp4_version')
return self.ccp4_version
def DetermineUnknownModel(self,emulate=False):
for p in self.GetFlatSubTree():
# try to determine type of 'unknown' pdb from user input
model_to_fix=p.inp.Get('model',typ='unknown',filetype='pdb')
if model_to_fix:
at_obj=p.inp.Get('model',has_atomtypes=True)
if not at_obj: at_obj=self.inp.Get('model',has_atomtypes=True)
if at_obj:
pdbcur=self.AddProg('pdbcur',propagate_out=False,propagate_inp=False)
pdbcur.inp.Add(model_to_fix)
for at in at_obj.GetAtomTypes():
pdbcur.SetKey('delatom', "[{0}]".format(at))
if self.GetParam('replace_met_mse') and 'S' not in at_obj.GetAtomTypes():
pdbcur.SetKey('delatom', "(MET)/SD[S]".format(at))
pdbcur.Run()
anom_atoms_present = pdbcur.GetStat('atoms_deleted')
pdbcur.inp.Set(pdbcur.out.model)
pdbcur.SetKey('delatom', "[C]", keep_previous=False)
pdbcur.Run()
prot_atoms_present = pdbcur.GetStat('atoms_deleted')
if anom_atoms_present:
model_to_fix.SetType('substr')
if prot_atoms_present:
model_to_fix.SetType('partial+substr')
elif prot_atoms_present:
model_to_fix.SetType('partial')
else:
common.Error('Neither substructure nor partial model present in file {0}.'.format(model_to_fix.GetFileName('pdb')))
self.Info('Type of {0} model set to {1}.'.format(model_to_fix.GetFileName('pdb'), model_to_fix.GetType()))
if emulate:
shutil.rmtree(pdbcur.rundir)
self.programs.remove(pdbcur)
else:
action = common.Warning if emulate else common.Error
action('Could not classify {0} as substructure atom types not specified.'.format(model_to_fix.GetFileName('pdb')))
def GuessDtype(self,dname,xname,atomtype=None,wl=None,safe_guess=False):
# some heuristics to "guess" the dtype
dtype_strings = (("peak",),("infl","inflection"),("hrem","hrm","high","high-remote"),("lrem","lrm","low","low-remote"))
dtype, dnum, num = None, None, 0
# safe guess - exact match of the dname or custom
for ik,key in enumerate(dtype_strings):
if dname in key or self.inp.Get('fsigf',xname=xname,dname=dname,custom=key):
return key[0],ik
if safe_guess:
return None,None
# try to guess from the dname
for idt,dts in enumerate(dtype_strings):
for dt in dts:
if dt in dname.lower():
num+=1
if num==1:
dtype, dnum = dts[0], idt
else:
dtype, dnum = None, None
break
# try to guess from the wavelength and atomtype
if not dtype and atomtype in self.w_pi:
edge_dist = [ abs(wl-edge[0]) for edge in self.w_pi[atomtype] ]
ind = edge_dist.index(min(edge_dist))
pi_dist = [ abs(wl-pi) for pi in self.w_pi[atomtype][ind] ]
dnum = ind = pi_dist.index(min(pi_dist))
dtype = "peak" if ind==0 else "infl"
if pi_dist[ind]>0.0075:
dtype = "hrem" if ind==0 else "lrem"
dnum+=2
return dtype, dnum
def GetAnomScatCoefs(self):
for p in self.GetFlatSubTree():
# get anomalous scattering coefficients from wavelength if not supplied by user
models_to_fix = p.inp.GetAll( 'model', has_atomtypes=True )
for model_to_fix in models_to_fix:
atomtype = model_to_fix.GetAtomType()
xname=model_to_fix.GetCrystalName()
# this only does one atom type (possible to generalize later of course)
if None in [ sv for v in model_to_fix.atomtypes[atomtype] for sv in v[:3]]:
crossec=self.AddProg('crossec')
crossec.inp.Set(model_to_fix)
# we assign theoretical wavelength if wavelength is not known but dname/custom specifies the MAD "type"
# in fact, we want to prefer wavelength from dname/custom for f',f'' generation of peak, infl
if atomtype in self.w_pi:
crossec.BackupAnyPars()
try:
crossec.TreatInput()
except common.CrankError as e:
sys.exc_clear()
crossec.RestoreAnyPars()
for dn in crossec.dnames:
if not crossec.wavels or dn not in zip(*crossec.wavels)[0]:
dtype,idt = self.GuessDtype(dn,xname,safe_guess=True)
if dtype:
ass_obj = self.inp.Get('fsigf',xname=xname,dname=dn)
ass_obj.wavel = self.w_pi[atomtype][0][idt] if idt<2 else self.w_pi[atomtype][0][idt-2]-0.004*(idt-2.5)
common.Warning("Wavelength for dataset {0} not known, using theoretical value of {1}".format(dn,ass_obj.wavel))
elif dn in ('infl','inflection','peak'):
ass_obj = crossec.inp.AddCopy( self.inp.Get('fsigf',xname=xname,dname=dn) )
ass_obj.wavel = self.w_pi[atomtype][0][0] if dn=='peak' else self.w_pi[atomtype][0][1]
self.Info("Using theoretical wavelength of {1} for f',f'' estimation".format(dn,ass_obj.wavel))
try:
crossec.Run()
except common.CrankError as e:
sys.exc_clear()
common.Warning("Could not determine f',f'' due to error: {0}".format(e.message))
else:
# saving the determined f',f'' in the input model for all datasets where it was not set before
for i,(dn,wl) in enumerate(crossec.wavels):
fp,fpp,d,a=model_to_fix.Getfpfpp(atomtype,dn)
if not fp and not fpp:
# some heuristics to "guess" the dtype
dtype,idt = self.GuessDtype(dn,xname,atomtype,wl)
model_to_fix.SetAtomType( atomtype, *(crossec.GetStat('anom_scat_coef',atomtype)[i]), dname=dn, guessed_dtype=dtype )
self.programs.remove(crossec)
def FixInpModels(self):
for p in self.GetFlatSubTree():
# try fixing substructure pdb from user input
model_to_fix=p.inp.Get('model',typ='substr',filetype='pdb')
if model_to_fix:
fixsub=self.AddProcess('fixsubstrpdb',propagate_out=False,propagate_inp=False)
fixsub.inp.Add(model_to_fix)
fixsub.AddProg('pdbcur')
fixsub.GetProg().outfilename['pdb'] = "fixed_"+p.nick+".pdb"
fixsub.Run()
p.inp.model.remove(model_to_fix)
p.inp.Add(fixsub.out.Get('model'), propagate=False)
self.processes.remove(fixsub)
# estimate number of substr. atoms
model_to_fix=p.inp.Get('model',typ=('substr','partial+substr'),filetype='pdb')
if model_to_fix and not model_to_fix.exp_num_atoms:
model_to_fix.GuessNumSubstrAtomsFromSeq(self)
if self.GetParam('replace_met_mse'):
for mod in p.inp.GetAll('model',typ=('partial','partial+substr'),has_atomtypes=True):
if 'SE' in mod.GetAtomTypes():
pdbcur=self.AddProg('pdbcur',propagate_out=False,propagate_inp=False)
pdbcur.inp.Add(mod)
pdbcur.SetKey('renresidue', "(MET) 'MSE'")
pdbcur.SetKey('renatom', "SD[S] 'SE'")
pdbcur.SetKey('renelement', "SE[S] 'SE'")
pdbcur.outfilename['pdb'] = "mse_repl_"+p.nick+".pdb"
pdbcur.Run()
p.inp.model.remove(mod)
p.inp.Add(pdbcur.out.Get('model'), propagate=False)
self.programs.remove(pdbcur)
def FixInpData(self):
# makes sure that xname/dname assignments match those in the mtz
objs = {}
for o in self.inp.GetAll('fsigf',filetype='mtz',try_convert=False):
objs.setdefault(o.GetFileName('mtz'),[]).append(o)
if objs:
mergemtz=self.GetProcess('mergemtz')
if not mergemtz:
mergemtz=self.AddProcess('mergemtz',propagate_out=False,propagate_inp=False)
mergemtz.SetParam('cons_check_suff')
simul = not self.GetParam('force_xdnames')
issue, self.x_rename, self.d_rename, save_xd = "", {}, {}, []
for i,f in enumerate(objs):
cons = None if f not in mergemtz.xdrename else not mergemtz.xdrename[f]
mergemtz.inp.Set(objs[f])
mergemtz.runname = 'merge'+str(i)
mergemtz.Run(outmtz=os.path.basename(f).rsplit('.',1)[0]+'_cad{0}.mtz'.format(i),simulate=simul,consistent=cons)
if not simul:
for o in objs[f]:
self.inp.SetFileToChild(o,mergemtz.out.Get(filetype='mtz').GetFileName('mtz'))
else:
substr_not_in_native_input=self.inp.Get(xname='native',has_num_atoms=True) and self.inp.Get(xname='native',has_num_atoms=True).exp_num_atoms==0
for (xmtz,dmtz),(xinp,dinp) in mergemtz.xdrename[f].iteritems():
save_xd.append( (xmtz,dmtz,xinp,dinp) )
if xinp not in self.x_rename:
self.x_rename[xinp]=xmtz
elif self.x_rename[xinp]!=xmtz:
issue='Inputted xname {0} corresponds to multiple mtz xnames ({1},{2}). '.format(xinp,xmtz,self.x_rename[xinp])
if dinp not in self.d_rename:
self.d_rename[dinp]=dmtz
elif self.d_rename[dinp]!=dmtz:
issue='Inputted dname {0} corresponds to multiple mtz dnames ({1},{2}). '.format(dinp,dmtz,self.d_rename[dinp])
for di in [ di for (xm,dm,xi,di) in save_xd if xm==xmtz and dm==dmtz and dinp!=di ]:
issue='MTZ xname,dname ({2},{3}) corresponds to multiple inputted dnames {0},{1}. '.format(di,dinp,xmtz,dmtz)
if substr_not_in_native_input:
xinp_other=[i for i,m in self.x_rename.iteritems() if m==xmtz and i!=xinp]
xinp_multi=[i[0] for i in mergemtz.xdrename_multiple[f][(xmtz,dmtz)] ] if (xmtz,dmtz) in mergemtz.xdrename_multiple[f] else []
if (xinp_other and (xinp=='native' or 'native' in xinp_other)) or \
(xinp_multi and 'native' in xinp_multi):
issue='Crystal mtz name {0} is the same for native and derivative but it is specified that native contains no substructure. '.format(xmtz)
if issue:
if self.IsParam('force_xdnames'):
common.Error(issue)
else:
common.Warning(issue+' Forcing all the input xnames+dnames!')
self.SetParam('force_xdnames',True)
self.FixInpData()
return
self.processes.remove(mergemtz)
if simul:
# if we do not adjust xname/dname in mtz then we have to adjust xname/dname in all the input objects
for p in [self,]+self.GetAllSubPrograms()+self.GetAllSubProcesses():
for inp_obj in p.inp.GetAll():
if 'xname' in inp_obj.supported_attributes:
if inp_obj.GetCrystalName() in self.x_rename:
if inp_obj.GetCrystalName()=='native' and 'custom' in inp_obj.supported_attributes:
inp_obj.custom.append(inp_obj.GetCrystalName())
inp_obj.SetCrystalName(self.x_rename[inp_obj.GetCrystalName()])
if 'dname' in inp_obj.supported_attributes:
if inp_obj.GetDataName() in self.d_rename:
if inp_obj.GetDataName() in ('peak','infl','inflection','hrem','high-remote','lrem','low-remote') and 'custom' in inp_obj.supported_attributes:
inp_obj.custom.append(inp_obj.GetDataName())
inp_obj.SetDataName(self.d_rename[inp_obj.GetDataName()])
if 'd_name' in inp_obj.supported_attributes:
for at,vs in inp_obj.GetAtomTypes().iteritems():
for v in vs:
if v[2] in self.d_rename:
inp_obj.SetAtomType( at,v[0],v[1],self.d_rename[v[2]] )
# replace sigma_i < 0.0 by MNF (do we want to do this for sigma_f as well?)
objs = {}
for o in self.inp.GetAll('fsigf',col='sigi',typ=('plus','minus','average'),filetype='mtz'):
objs.setdefault(o.GetFileName('mtz'),[]).append(o)
if objs:
for i,f in enumerate(objs):
sft=self.AddProg('sftools')
sft.SetKey('read "{0}"'.format(f))
sft.SetKey('quit\nY')
sft.Run()
if sft.GetStat('zero_sigma',accept_none=True):
outmtz = objs[f][0].GetFileName('mtz',trim_path=True).rsplit('.',1)[0]+'_sft{0}.mtz'.format(i)
for o in objs[f]:
sft.SetKey('absent', ('col',o.GetLabel('i'),o.GetLabel('sigi'),'if','col',o.GetLabel('sigi'),'<=','0.0'), insert_index=-1)
self.inp.SetFileToChild(o,os.path.join(sft.rundir,outmtz))
sft.SetKey('write', outmtz, insert_index=-1)
sft.Run()
self.programs.remove(sft)
def SetTargetDefault(self, proc, emulate=False):
# exclude processes that never need/support targets first
if proc.nick not in ['phas','faest','substrdet','handdet','dmfull','phcomb','ref','comb_phdmmb','mbref','refatompick']:
return
if proc.nick=='dmfull' and self.GetProg(supported=True):
return
crank_targ = self.GetParam('target') if self.GetParam('target') else None
if not crank_targ and hasattr(self,'run'): crank_targ = self.run.GetParam('target')
# if overall target/experiment is set
if crank_targ and not proc.IsParam('target'):
# now set the default target for the current process
if proc.nick in ['phas','faest','substrdet'] or crank_targ=='RICE' or \
(crank_targ=='SAD' and ((proc.nick!='ref' and proc.nick!='mbref') or \
(not proc.inp.Get('fsigf',is_native=True) and self.inp.Get('model',has_fpp=True)) )) or \
(crank_targ=='SIRAS' and (proc.nick=='dmfull' or proc.nick=='phcomb')):
### should disable SIRAS for DM (line above)? slower and suboptimal results eg for toxd,x15
proc.SetParam('target', crank_targ)
else:
proc.SetParam('target', 'MLHL')
# if target/experiment is not set
if not crank_targ and not proc.IsParam('target'):
if proc.nick in ['ref','phcomb','mbref']:
proc.SetParam('target', 'RICE')
elif not emulate:
common.Error('No target specified for process of {0}'.format(proc.name))
def PassToHandDet(self, handdet, ip):
tobepassed=("phas","dmfull","refatompick")
for tbp in tobepassed:
if not handdet.GetParam('ignore_passed_phas') or tbp=='dmfull':
for pp in [ i for i in range(ip) if self.processes[i].nick==tbp ]:
handdet.processes.append(self.processes[i])
for o in self.run.processes[i].out.GetAll(stored_order=True):
handdet.processes[-1].out.AddCopy(o)
#handdet.processes[-1].programs = self.run.processes[i]
#for pp in [ self.run.processes[i] for i in range(ip) if self.run.processes[i].nick==tbp ]:
# handdet.processes.append(pp)
def PassChosenHand(self, handdet_run, ip):
for pp in [ self.processes[i] for i in range(ip+1,len(self.processes)) ]:
# we could invert any previously inputted models to subsequent steps
# but I would need to create createotherhand process to it this way.
#for mod in pp.inp.GetAll('model'):
# handdet.CreateHand2Model(mod, pp.inp.Get('fsigf',xname=mod.xname).GetSpacegroup(handdet))
#pp.inp.SetCopy()
# so we simply replace all mtz's instead - if something else is needed it should be added/adjusted here
pp.inp.SetCopy(handdet_run.out.fsigf,propagate=True)
def SubProcess_WriteXML_CreateRun(self,p,ip,emulate=False):
# write the process input xml
p_xml=os.path.join(self.rundir, p.nick+'.xml')
crank_p=copy.copy(self)
crank_p.processes=[p,]
# if emulate:
# p_xml = crank_p.Data2XML()
# else:
# common.WriteXML(crank_p, p_xml)
common.WriteXML(crank_p, p_xml)
# "undummify" by re-creating the process from the written xml
# pass extra attrs set by user from parse as eg disable_mtz_label_prefix needs to be set at the process creation
extra_attrs = { attr:getattr(self,attr) for attr in parse.parse(dummy=True).share_with_crank if hasattr(self,attr) }
crank_p=self.from_xml(p_xml, rundir=self.rundir, extra_attrs=extra_attrs)
p_run = crank_p.processes[0]
self.run.processes[ip] = p_run
p_run.parent_process=self.run
# initialize the 'actual' crank (meta)attributes not saved in xml
if ip==0:
self.run.prep=self
for at in ('logfilehandle','loggraphfilehandle','opened_log','opened_loggraph','ccp4_version',
'rv_report','emulate','d_rename','x_rename') + \
parse.parse(dummy=True).share_with_crank:
if hasattr(self,at):
setattr(self.run, at, getattr(self,at))
p_run.SetRunDir(os.path.join(self.rundir,str(ip)+'-'+p.nick))
return p_run
def Initialize(self,p_run, emulate=False):
# any other general process initialization
#convert I+/I- to I average in case no I average was supplied
for op in p_run.inp.GetAll('fsigf',col='i',typ='plus',filetype=('sca','hkl','HKL'),try_convert=False):
if not p_run.inp.Get('fsigf',typ='average',col='i',xname=op.GetCrystalName(),dname=op.GetDataName()):
o_new=p_run.inp.AddNew('fsigf',op.GetFileName(),typ='average',filetype=set(op.GetFileTypes()).intersection(('sca','hkl','HKL')).pop(), \
xname=op.GetCrystalName(),dname=op.GetDataName(),cell=op.cell,spgr=op.spgr,resol=op.resol,propagate=False)
o_new.SetLabel( ['i','sigi'] )
num=0
for op in p_run.inp.GetAll('fsigf',col='i',typ='plus',filetype='mtz',try_convert=False):
if not p_run.inp.Get('fsigf',typ='average',col='i',xname=op.GetCrystalName(),dname=op.GetDataName()):
om = p_run.inp.Get('fsigf',typ='minus',col='i',xname=op.GetCrystalName(),dname=op.GetDataName())
if om:
num+=1
sft=p_run.AddProg('sftools',propagate_out=False)
sft.runname+='_I'+str(num)
sft.SetRunDir()
outname = op.GetFileName(trim_path=True).rsplit('.',1)[0] + '_Isft{}.mtz'.format(num)
o_new = p_run.out.AddNew('fsigf',os.path.join(sft.rundir,outname),typ='average',xname=op.GetCrystalName(),dname=op.GetDataName(),propagate=False)
o_new.SetLabel( ['i','sigi'] )
o_new.custom.extend([c for c in om.custom if c in ('native','hrem','lrem','peak','infl')])
i, si = o_new.GetLabel('i'), o_new.GetLabel('sigi')
sft.SetKey('read', ['"'+op.GetFileName('mtz')+'"','col','"'+op.GetLabel('i')+'"','"'+op.GetLabel('sigi')+'"'] )
sft.SetKey('read', ['"'+om.GetFileName('mtz')+'"','col','"'+om.GetLabel('i')+'"','"'+om.GetLabel('sigi')+'"'] )
if op.GetLabel('f') and op.GetLabel('sigf') and om.GetLabel('f') and om.GetLabel('sigf'):
sft.SetKey('read', ['"'+op.GetFileName('mtz')+'"','col','"'+op.GetLabel('f')+'"','"'+op.GetLabel('sigf')+'"'] )
sft.SetKey('read', ['"'+om.GetFileName('mtz')+'"','col','"'+om.GetLabel('f')+'"','"'+om.GetLabel('sigf')+'"'] )
sft.SetKey('calc', ['J','col',i,'=','col','1','col','3','+','2','/'] )
sft.SetKey('calc', ['Q','col',si,'=','col','2','2','**','col','4','2','**','+','0.5','**','2','/'])
sft.SetKey('select','centro'), sft.SetKey('calc', ['Q','col',si,'=','col','2'])
sft.SetKey('select','all'), sft.SetKey('select', ['col','1']), sft.SetKey('select', ['not','col','3'])
sft.SetKey('calc', ['J','col',i,'=','col','1']), sft.SetKey('calc', ['Q','col',si,'=','col','2'])
sft.SetKey('select','all'), sft.SetKey('select', ['col','3']), sft.SetKey('select', ['not','col','1'])
sft.SetKey('calc', ['J','col',i,'=','col','3']), sft.SetKey('calc', ['Q','col',si,'=','col','4'])
sft.SetKey('select','all'), sft.SetKey('write',outname), sft.SetKey('Y'), sft.SetKey('quit'), sft.SetKey('Y')
sft.Run(check_bin=True)
p_run.inp.Add(o_new)
op_new, om_new = p_run.inp.AddCopy(op), p_run.inp.AddCopy(om)
#op_new.InitLabels(), om_new.InitLabels()
#op_new.SetLabel( ['i','sigi'], [op.GetLabel('i'),op.GetLabel('sigi')] )
#om_new.SetLabel( ['i','sigi'], [om.GetLabel('i'),om.GetLabel('sigi')] )
self.inp.SetFileToChild(op_new, os.path.join(sft.rundir,outname), 'mtz')
self.inp.SetFileToChild(om_new, os.path.join(sft.rundir,outname), 'mtz')
p_run.programs.remove(sft)
# try to obtain basic info about datasets (doing it only once per each xname+dname)
cell_use, spgr_use, todo_cellspg = None, None, []
for xn in p_run.inp.GetCrystalsList():
for dn in p_run.inp.GetDataList():
cell, spgr, datasets = None, None, p_run.inp.GetAll('fsigf',xname=xn,dname=dn)
for o in datasets:
if not cell:
cell = o.GetCell(self,accept_none=True)
if cell:
o.cell, cell_use = cell, cell
else:
todo_cellspg.append(o)
if not spgr or o.spgr or o.spgr_num: # the last two make sure to handle eg spgr change at handdet
spgr = o.GetSpacegroup(self,accept_none=True)
if spgr:
o.spgr, spgr_use = spgr, spgr
else:
todo_cellspg.append(o)
if not cell and datasets:
common.Warning('Cell parameters for crystal {0}, data {1} not known, will try to assign it cell from other dataset if possible.'.format(xn,dn))
if not spgr and datasets:
common.Warning('Spacegroup for crystal {0}, data {1} not known, will try to assign spacegroup from other dataset if possible.'.format(xn,dn))
# take native cell+spacegroup from derivative if it was not found or vice versa
for o in todo_cellspg:
if not o.GetCell(self,accept_none=True): o.cell = cell_use
if not o.GetSpacegroup(self,accept_none=True): o.spgr = spgr_use
#convert I's to F's in case no F was supplied
num=0
wilson_scale={}
for o in p_run.inp.GetAll('fsigf',col='i',typ=('plus','average'),filetype='mtz'):
if not p_run.inp.Get('fsigf',col='f',typ=o.GetType(),xname=o.GetCrystalName(),dname=o.GetDataName(),filetype='mtz') and \
(not o.GetType()=='average' or not p_run.inp.Get('fsigf',col='i',typ='plus',xname=o.GetCrystalName(),dname=o.GetDataName(),filetype='mtz')):
num+=1
# sftools conversion disabled as of now - it seems truncate works better at least for thio (?)
# outname = o.GetFileName(trim_path=True).rsplit('.',1)[0] + '_Fsft'+str(num)+'.mtz'
# o.SetLabel( ['f','sigf'] )
# lab_types = ('G','L') if o.GetType()=='plus' else ('F','Q')
# sft=p_run.AddProg('sftools',propagate_out=False)
# sft.runname+='_F'+str(num)
# sft.SetRunDir()
# sft.SetKey('read', ['"'+o.GetFileName('mtz')+'"','col','"'+o.GetLabel('i')+'"','"'+o.GetLabel('sigi')+'"'] )
# sft.SetKey('i2f',['col','"'+o.GetLabel('i')+'"','"'+o.GetLabel('sigi')+'"'])
# sft.SetKey('set',['label','col','3','4']), sft.SetKey(o.GetLabel('f')), sft.SetKey(o.GetLabel('sigf'))
# sft.SetKey('set',['type','col','3','4']), sft.SetKey(lab_types[0]), sft.SetKey(lab_types[1])
# if o.GetType()=='plus':
# om = p_run.inp.Get('fsigf',typ='minus',col='i',xname=o.GetCrystalName(),dname=o.GetDataName())
# om.SetLabel( ['f','sigf'] )
# sft.SetKey('read', ['"'+om.GetFileName('mtz')+'"','col','"'+om.GetLabel('i')+'"','"'+om.GetLabel('sigi')+'"'] )
# sft.SetKey('i2f',['col','"'+om.GetLabel('i')+'"','"'+om.GetLabel('sigi')+'"'])
# sft.SetKey('set',['label','col','7','8']), sft.SetKey(om.GetLabel('f')), sft.SetKey(om.GetLabel('sigf'))
# sft.SetKey('set',['type','col','7','8']), sft.SetKey(lab_types[0]), sft.SetKey(lab_types[1])
# sft.SetKey('write',outname), sft.SetKey('Y'), sft.SetKey('quit'), sft.SetKey('Y')
# sft.Run(check_bin=True)
# self.inp.SetFileToChild(o, os.path.join(sft.rundir,outname), 'mtz')
# if o.GetType()=='plus':
# self.inp.SetFileToChild(om, os.path.join(sft.rundir,outname), 'mtz')
# p_run.programs.remove(sft)
# now put on rough absolute scale
# if not emulate:
# if o.GetCrystalName() not in wilson_scale:
# wilson = p_run.AddProg('wilson')
# wilson.inp.Add(o)
# wilson.Run()
# wilson_scale[o.GetCrystalName()] = math.sqrt(wilson.GetStat('wilson_scale'))
# p_run.programs.remove(wilson)
# wsc=wilson_scale[o.GetCrystalName()]
# outname = o.GetFileName(trim_path=True).rsplit('.',1)[0] + 'sc.mtz'
# sft=p_run.AddProg('sftools',propagate_out=False)
# sft.runname+='_Fsc'+str(num)
# sft.SetRunDir()
# sft.SetKey('read', '"'+o.GetFileName('mtz')+'"' )
# sft.SetKey('calc',['col',o.GetLabel('f'),'=','col',o.GetLabel('f'),wsc,'*'])
# sft.SetKey('calc',['col',o.GetLabel('sigf'),'=','col',o.GetLabel('sigf'),wsc,'*'])
# if o.GetType()=='plus':
# sft.SetKey('calc',['col',om.GetLabel('f'),'=','col',om.GetLabel('f'),wsc,'*'])
# sft.SetKey('calc',['col',om.GetLabel('sigf'),'=','col',om.GetLabel('sigf'),wsc,'*'])
# sft.SetKey('write',outname), sft.SetKey('Y'), sft.SetKey('quit'), sft.SetKey('Y')
# sft.Run(check_bin=True)
# self.inp.SetFileToChild(o, os.path.join(sft.rundir,outname), 'mtz')
# if o.GetType()=='plus':
# self.inp.SetFileToChild(om, os.path.join(sft.rundir,outname), 'mtz')
# p_run.programs.remove(sft)
orig_i = [copy.deepcopy(o),]
if o.GetType()=='plus':
om = p_run.inp.Get('fsigf',typ='minus',col='i',xname=o.GetCrystalName(),dname=o.GetDataName(),filetype='mtz')
oa = p_run.inp.Get('fsigf',typ='average',col='i',xname=o.GetCrystalName(),dname=o.GetDataName(),filetype='mtz')
if om: orig_i.append(copy.deepcopy(om))
if oa: orig_i.append(copy.deepcopy(oa))
trun=p_run.AddProg('truncate',propagate_out=False)
trun.inp.Set(orig_i)
o_base = o.GetFileName(trim_path=True).rsplit('.',1)[0]
trun.outfilename['mtz'] = o_base+'_'+trun.name[:3]+str(num)+'.mtz'
try:
trun.Run(check_bin=True)
except trun.ProgramRunError as e:
sys.exc_clear()
trun.out.ClearAll()
ctrun=p_run.AddProg('ctruncate',propagate_out=False)
ctrun.inp.Set(orig_i)
ctrun.runname+='_'+str(num)
ctrun.outfilename['mtz'] = o_base+'_'+ctrun.name[:3]+str(num)+'.mtz'
self.Info('{0} failed with error message: {1}'.format(trun.name,e))
self.Info('Will try {0} instead.'.format(ctrun.name))
ctrun.Run(check_bin=True)
for o in ctrun.out.GetAll('fsigf'):
p_run.inp.Add(o)
p_run.programs.remove(ctrun)
else:
for o in trun.out.GetAll('fsigf'):
p_run.inp.Add(o)
p_run.programs.remove(trun)
#convert F+/F- to F average (+FD) in case no F average was not supplied
for o in p_run.inp.GetAll('fsigf',col='f',typ='plus',filetype='mtz'):
if not p_run.inp.Get('fsigf',typ='average',col='f',xname=o.GetCrystalName(),dname=o.GetDataName()):
mtzmadmod=p_run.AddProg('mtzMADmod',propagate_out=False)
o_base = o.GetFileName(trim_path=True).rsplit('.',1)[0]
mtzmadmod.outfilename['mtz'] = o_base+'_'+mtzmadmod.name[:7]+'.mtz'
mtzmadmod.Run(check_bin=True)
for out in mtzmadmod.out.GetAll(filetype='mtz',typ='average'):
p_run.inp.Add( out, ind=p_run.inp.fsigf.index(p_run.inp.Get('fsigf',typ='plus',dname=out.dname,xname=out.xname)), propagate=False )
p_run.programs.remove(mtzmadmod)
# scale native to derivative
if p_run.nick=='crank' and not emulate and p_run.inp.Get('fsigf',col='sigf',typ='average',filetype='mtz',is_native=True):
scaleit=p_run.AddProg('scaleit', propagate_out=False)
scaleit.revert_if_one=True
#scaleit.SetKey('refi','scale')
scaleit.Run()
p_run.programs.remove(scaleit)
# setting solvent content and/or num. of monomers
if (not self.inp.Get(has_solvent_content=True) or not self.inp.Get(has_monomers_asym=True)) and \
self.inp.Get('fsigf') and (self.inp.Get('sequence') or self.inp.Get(has_residues_mon=True)) and \
cell_use and spgr_use:
matthews=p_run.AddProcess('matthews', propagate_out=False)
matthews.no_reporting = True
matthews.Run()
p_run.processes.remove(matthews)
else:
if self.inp.Get('sequence') and not self.inp.Get(has_residues_mon=True):
self.inp.Get('sequence').residues_mon=len(self.inp.Get('sequence').GetSequenceString())
if self.inp.Get(has_solvent_content=True) and self.inp.Get('sequence') and not self.inp.Get('sequence',has_solvent_content=True):
self.inp.Get('sequence').solvent_content=self.inp.Get(has_solvent_content=True).solvent_content
if self.inp.Get('sequence') and not self.inp.Get('sequence').seq_monomers:
self.inp.Get('sequence').GetSequenceString()
if not cell_use or not spgr_use:
if not cell_use: error='No cell could be determined!'
if not spgr_use: error='No spacegroup could be determined!'
return error
# get resolution for native only
resol = None
for o in p_run.inp.GetAll('fsigf',xname='native')+p_run.inp.GetAll('fsigf',custom='native'):
if not resol:
resol = o.GetResolution(self,accept_none=True)
else:
o.resol = resol
def RunSubProcess(self,p_run,ip):
# skip ref if nothing was obtained
prev_proc=self.run.processes[ip-1]
if prev_proc.nick in ('mbref','comb_phdmmb') and prev_proc.mb_res_all and prev_proc.mb_res_all[-1][1]==0:
common.Warning('Skipping process of {0} as no residues were obtained in building.'.format(p_run.name))
return
# register process with ccp4i2 if run from ccp4i2 and run the process
if self.run.ccp4i2:
ccp4i2crank.RegisterProcessToCCP4i2(self.run.ccp4i2, p_run)
# run the current process
error,nosuccess=False,False
try:
p_run.Run(from_ccp4i2=self.run.ccp4i2)
except common.Unsuccessful as e:
nosuccess=True
if not self.no_reporting and crvapi:
summary = self.rv_body.Section("summary","Job Summary",row=0)
summary.Text('The pipeline stopped preliminary at {}:'.format(p_run.name))
summary.Text(e.message, flush=True)
except:
error=True
if self.run.ccp4i2:
# register process output with ccp4i2
ccp4i2crank.RegisterOutputToCCP4i2(p_run,error,nosuccess)
if nosuccess or error:
raise
def PassPrevStepsOutput(self,p,ip,emulate=False):
# (indirectly) pass shelxc output to the subsequent steps if needed
prev_proc=self.run.processes[ip-1]
if 'shelxc' in [r.nick for r in prev_proc.programs] and not prev_proc.IsNonFalseVirtPar('no_output_to_next_step') and \
[pr for i,pr in enumerate(self.processes) if i>ip and not [prg for prg in pr.programs if 'shelx' in pr.name]]:
for hkl in prev_proc.out.GetAll('fsigf',filetype=('hkl','cif'),try_convert=False):
if not self.inp.Get('fsigf',filetype='mtz',typ=hkl.GetType(),try_convert=False,xname=hkl.GetCrystalName(),dname=hkl.GetDataName()):
mtz_obj = prev_proc.out.Get('fsigf',filetype='mtz',typ=hkl.GetType(),xname=hkl.GetCrystalName(),dname=hkl.GetDataName())
if mtz_obj:
self.inp.Add( mtz_obj )
else:
common.Warning('Could not convert shelxc output {0} to mtz!'.format(hkl.GetFileName()))
self.Initialize(self)
# pass "unbiased" phases from phasing to building
if p.nick in ('mbref','comb_phdmmb') and p.GetParam('target')!='SAD':
handdet=next((r for ir,r in enumerate(self.run.processes) if ir0 and not prev_proc.IsNonFalseVirtPar('no_output_to_next_step'):
# pass all except the specified exceptions from the previous step
for o in prev_proc.out.GetAll(stored_order=True):
# we exclude any luzd output as that is generally not meant to be used by next steps
# similarly, we use best rather than weighted mapcoef - any exceptions should be specified here
if o.typ!='dluz' and (o.typ!='weighted' or not [m for m in prev_proc.out.GetAll('mapcoef') if m.typ!='weighted']):
p.inp.AddCopy(o,propagate=True)
# pass what was specified by the input
if not emulate:
p.inp.DelayedInit(ip,self)
def RunPostprocess(self,restore=True,emulate=False,*args,**kwargs):
if emulate:
os.chdir(self.previous_cwd)
try:
os.remove(self.GetLogFileName())
os.rmdir(self.rundir)
except (OSError,):
sys.exc_clear()
else:
mapout=self.run.out.Get('mapcoef')
if mapout:
outmtz=self.run.out.Get('datafile',filetype='mtz',custom='refmac')
if not outmtz:
outmtz=self.run.out.Get('mapcoef',filetype='mtz',inp_cont=self.inp.Get('fsigf',xname=mapout.xname,dname=mapout.dname))
if outmtz and hasattr(self,'hklout') and outmtz.GetFileName()!=self.hklout:
shutil.copy(outmtz.GetFileName('mtz'), self.hklout)
# preparing the maps for jsCoFe
if not self.ccp4i2:
outmap=self.run.out.Get('mapcoef',filetype='map',typ='weighted',inp_cont=self.inp.Get('fsigf',xname=mapout.xname,dname=mapout.dname))
if outmap: shutil.copy(outmap.GetFileName('map'), self.hklout+'.map')
outdmap=self.run.out.Get('mapcoef',filetype='map',typ='diff',conv_opts=['no_rewrite'],inp_cont=self.inp.Get('fsigf',xname=mapout.xname,dname=mapout.dname))
if outdmap: shutil.copy(outdmap.GetFileName('map'), self.hklout+'_diff.map')
outpdb=self.run.out.Get(filetype='pdb',typ=('partial+substr','partial'))
if not outpdb: outpdb=self.run.out.Get(filetype='pdb')
if outpdb and hasattr(self,'xyzout') and outpdb.GetFileName()!=self.xyzout:
shutil.copy(outpdb.GetFileName(), self.xyzout)
process.RunPostprocess(self,restore)
if hasattr(self,'logout'):
sys.stdout = self.stdout_save