#!/usr/bin/python
import os,sys,re,string
from process import process
import common


class sepsubstrprot(process):
  name="separation of substructure and protein into 2 PDB files"
  short_name="PDB models separation"
  # assumes substr+prot pdb and substructure atom types on input 
  # output is a substructure pdb and a protein pdb (with empty intersection between them)
  # at the moment, just performed in a simplistic way, based on substructure atomtypes
  # this may be not indended eg for S+methionine/cysteine - perhaps (some previous) substr pdb 
  # should be inputted too and distance criterion from previous substr atoms added?

  def RunBody(self,*args,**kwargs):
    if not self.GetProg('pdbcur'):
      self.pdbcur=self.AddProg('pdbcur')
    else:
      self.pdbcur=self.GetProg('pdbcur')
    if not self.inp.Get('model',typ='partial+substr'):
      common.Error('No protein+substructure model inputted - separation cannot be done')
    if not self.inp.Get('model',typ=('substr','partial+substr'),has_atomtypes=True):
      common.Error('Substructure atom types not specified - separation of substructure and protein cannot be done')
    # quick hack to fix the TER card issues by removing them - not needed anymore thanks to Eugene's new pdbcur functions!
    #pdb = self.inp.Get('model',typ='partial+substr')
    #file_fixed = pdb.GetFileName('pdb')+'_fixed.pdb'
    #with open(file_fixed,'w') as g:
    #  with open(pdb.GetFileName('pdb')) as f:
    #    for line in f:
    #      if not line.startswith('TER   '):
    #        g.write(line)
    #pdb_fixed=self.pdbcur.inp.AddCopy(pdb)
    #self.pdbcur.inp.SetFileToChild(pdb_fixed,file_fixed,filetype='pdb')
    # if substr. is provided in inp. then also filter according to the chains in it
    substr_chains_filter=''
    if self.inp.Get('model',typ='substr'):
      self.pdbcur.ClearAnyParams()
      self.pdbcur.runname=self.pdbcur.name+'_checkinpsub'
      self.pdbcur.inp.Set(self.inp.Get('model',typ='substr'))
      self.pdbcur.SetKey('summ')
      self.pdbcur.Run()
      substr_chains=self.pdbcur.GetStat('chain_ids',accept_none=True)
      self.pdbcur.inp.Set(self.inp.Get('model',typ='partial+substr'))
      if set(self.pdbcur.bad_chain_ids).intersection(substr_chains):
        # rename substr. chains if ids that pdbcur does not accept are present
        self.pdbcur.runname=self.pdbcur.name+'_checkinpmerge'
        self.pdbcur.Run()
        all_chains=self.pdbcur.GetStat('chain_ids',accept_none=True)
        pdbset=self.GetOrAddProg('pdbset')
        pdbset.ClearAnyParams()
        pdbset.inp.Set(self.inp.Get('model',typ='partial+substr'))
        for bad_id in set(self.pdbcur.bad_chain_ids).intersection(substr_chains):
          new_id = next(nid for nid in string.printable if nid not in all_chains and nid not in self.pdbcur.bad_chain_ids and nid not in self.pdbset.bad_chain_ids)
          pdbset.SetKey('chain',(bad_id,new_id))
          all_chains.append(new_id)
          substr_chains.append(new_id), substr_chains.remove(bad_id)
        pdbset.Run()
        self.pdbcur.inp.Set(pdbset.out.Get('model'))
        substr_chains_filter='//'+','.join(substr_chains)+'//' if substr_chains else ''
    # if the prot+substr input model does not contain ha types then include them from substr
    if not self.pdbcur.inp.Get('model',has_atomtypes=True):
      substr=self.inp.Get('model',typ='substr',has_atomtypes=True)
      self.pdbcur.inp.Get('model').SetAtomTypes( substr.GetAtomTypes(), atomtype1=substr.GetAtomType() )
    # first get heavy atoms PDB
    self.pdbcur.ClearAnyParams()
    self.pdbcur.runname=self.pdbcur.name+'_heavy'
    at_str=','.join(self.inp.Get('model',typ=('substr','partial+substr'),has_atomtypes=True).GetAtomTypes())
    self.pdbcur.SetKey('delter')
    # at the moment, an exception is needed for TA as its name is TA1 in the only residue existing
    # we could only check the atom type in the future but for the moment this is safer
    if at_str=='TA':
      self.pdbcur.SetKey('lvatom', '"{1}{0}1[{0}]:*"'.format(at_str,substr_chains_filter))
    else:
      self.pdbcur.SetKey('lvatom', '"{1}{0}[{0}]:*"'.format(at_str,substr_chains_filter))
      #self.pdbcur.SetKey('lvatom', '"(!MSE)/{0}[{0}]:*"'.format(at_str))
    self.pdbcur.outfilename['pdb']='heavy.pdb'
    self.pdbcur.Run()
    self.pdbcur.out.Get('model').SetType('substr')
    # manually remove all cards before CRYST1 (otherwise wrong cards eg non-existing bonds may remain 
    # and cause trouble later - pdbcur should remove them but does not)
    with open(self.pdbcur.out.Get('model').GetFileName('pdb')) as f:
      pdb=f.read()
      re_res=re.search('CRYST1.+(\n.*)+',pdb)
      if not re_res:
        common.Error('CRYST1 card missing in PDB file {0}'.format(self.pdbcur.out.Get('model').GetFileName('pdb')))
    with open(self.pdbcur.out.Get('model').GetFileName('pdb'),'w') as g:
      g.write(re_res.group(0))
    # now get partial model PDB
    self.pdbcur.ClearAnyParams()
    self.pdbcur.runname=self.pdbcur.name+'_part'
    for at in self.inp.Get('model',typ=('substr','partial+substr'),has_atomtypes=True).GetAtomTypes():
      self.pdbcur.SetKey('rmatom', '"{1}{0}[{0}]:*"'.format(at,substr_chains_filter))
      if at=='TA':
        self.pdbcur.SetKey('rmatom', '"{1}{0}1[{0}]:*"'.format(at,substr_chains_filter))
    self.pdbcur.outfilename['pdb']='part.pdb'
    self.pdbcur.Run()
    self.pdbcur.out.Get('model').SetType('partial')