#!/usr/bin/env ccp4-python
#

import os, sys, shutil, string, time
import subprocess, signal, multiprocessing
import argparse
import random
import copy
import re
import glob
import platform
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import ParseError
import urllib, urllib2
from mmtbx.alignment import align

headerMessage = """
#################################################################
#################################################################
########## LOw REsolution STructure Refinement pipeline #########
######################### (LORESTR) #############################
############### Version beta ############ Jun 2016 ##############
#################################################################
## Copyright (C) 2015-2016 Oleg Kovalevskiy, Robert Nicholls   ##
##                     and Garib Murshudov                     ##
##                                                             ##
## LORESTR is distributed in the hope that it will be useful,  ##
## but WITHOUT ANY WARRANTY; without even the implied warranty ##
## of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.     ##
## See the GNU Lesser General Public License (v3) for details. ##
## The authors accept no liability whatsoever for any damage   ##
## caused by the use or misuse of this software.               ##
##                                                             ##
## E-mail: okovalev@mrc-lmb.cam.ac.uk                          ##
##                                                             ##
#################################################################

"""

helpMessage = """

Arguments:

-xyzin, -p1, --pdb1 : PDB file for refinement
-hklin, -f, --mtz   : MTZ data file for refinement
-p2, --pdb2         : PDB file(s) for external restraints generation
-o                  : Output directory
-xyzout             : Output PDB file
-hklout             : Output MTZ data file
-auto               : Run BLAST search over PDB database, fetch homologous
                      structures and use them for external restraints
-pr                 : Protocol file
-nc                 : Maximal number of chains for external restraints
                      generation (default: 3)
-nh                 : Maximal number of homologous structures (default: 5)
-dna                : Generate restraints for DNA/RNA chains
-tls                : Supply TLS file for refinement
-save_space         : Save disk space by removing ProSMART output
                      (all scripts are left)
-labin              : Specify LABIN parameter for Refmac
-phased             : Use phased refinement. Please specify correct -labin
                      parameter for Refmac as well.
-free_1             : CNS-style Free flag ('FREE 1' option for Refmac)
-mr                 : Use this option immediately after MR to run
                      200 cycles of jelly body refinement at the beginning
-cpu                : Number of CPUs used (default: 1)
-minres             : Minimal resolution for automatically downloaded
                      homologues (default: 3.5 A)
#################################################################

Minimal run (will try only jelly-body and hydrogen bonds restraints):
lrsr.py -p1 1.pdb -f 1.mtz

Typical run:
lrsr.py -p1 1.pdb -f 1.mtz -p2 homologue1.pdb homologue2.pdb

Automated run with BLAST search:
lrsr.py -p1 1.pdb -f 1.mtz -auto

#################################################################


"""


def indent(elem, level=0):
  i = "\n" + level * "  "
  if len(elem):
    if not elem.text or not elem.text.strip():
      elem.text = i + "  "
    if not elem.tail or not elem.tail.strip():
      elem.tail = i
    for elem in elem:
      indent(elem, level + 1)
    if not elem.tail or not elem.tail.strip():
      elem.tail = i
  else:
    if level and (not elem.tail or not elem.tail.strip()):
      elem.tail = i


def generateNextDirectoryName(originalDirectory):
  if originalDirectory[-1] == os.sep:
    originalDirectory = originalDirectory[:-1]

  directories = glob.glob(originalDirectory + '_*')
  num = 0
  match = None
  for directory in sorted(directories):
    match = re.search(originalDirectory+'_(\d+)$', directory)
    if match:
      if int(match.group(1)) > num:
        num = int(match.group(1))
  if match:
    num += 1
    return "%s_%d%s" % (originalDirectory, num, os.sep)
  else:
    return originalDirectory + '_1' + os.sep


class Logger(object):
  def __init__(self, logFileName):
    self.terminal = sys.stdout
    try:
      self.log = open(logFileName, 'a')
    except:
      sys.stdout.write('Unable to create main log file %s; terminating.' % logFileName)
      sys.exit(1)

  def write(self, message):
    self.terminal.write(message)
    self.log.write(message)
    self.terminal.flush()
    self.log.flush()

  def flush(self):
    self.terminal.flush()
    self.log.flush()


class ChainRestraints:
  def __init__(self, chain, restraintType):

    if restraintType not in ['Homologue', 'Hbond']:
      sys.stdout.write('Error: Restraints could be only "Homologue","Hbond", not "%s"\n' % restraintType)
      sys.exit(1)

    if len(chain) > 1:
      sys.stdout.write('Error: length of chain identifier can not be more than one letter.\n')
      sys.exit(1)

    self.chain = chain
    self.type = restraintType
    self.restraintList = []  # list of tuples [pdbFileName,chain] for external restrains with homologous structure


class RefinementProtocol:
  def __init__(self, pdbToRefine, refinementType):

    if refinementType not in ['JellyBody', 'ExternalRestraints', 'AllHomologs']:
      sys.stdout.write('Error: Protocol could be only "JellyBody" or "ExternalRestraints" or "AllHomologs", not "%s"\n' % refinementType)
      sys.exit(1)

    if not os.path.isfile(pdbToRefine.pdbFileName):
      sys.stdout.write('Error: PDB file does not exist: %s\n' % pdbToRefine.pdbFileName)
      sys.exit(1)

    if not os.path.isfile(pdbToRefine.mtzFileName):
      sys.stdout.write('Error: mtz file does not exist: %s\n' % pdbToRefine.mtzFileName)
      sys.exit(1)

    self.pdbToRefine = pdbToRefine
    self.type = refinementType
    self.pdbChainRestraints = []  # list of ChainRestraints
    self.description = ''
    self.directory = ''
    self.refmacs = []  # list of Refmac instances for the protocol
    self.prosmarts = []  # list of Prosmart instances
    self.homologsFilesList = None
    self.restraint = '' # restrain-all or restrain-best for AllHomologs
    self.containerCmdFileName = ''
    self.containerCmd = []
    self.started = False
    self.running = False
    self.executed = False
    self.ss = False
    self.restraintsFileName = ''
    self.dnaRestraintsFileName = ''
    self.isWindows = False


  def run(self):
    if len (self.containerCmd) > 0:
      outCom = open(self.containerCmdFileName, 'w')
      if not self.isWindows:
        outCom.write(shellHeader)
      outCom.write(''.join(self.containerCmd) + '\n\n')
      outCom.close()

    if not self.isWindows:
      st = os.stat(self.containerCmdFileName)
      os.chmod(self.containerCmdFileName, st.st_mode | 0111 )

    #os.system('/bin/sh "%s"' % self.containerCmdFileName)

    self.running = True

    for prosmart in self.prosmarts:
      prosmart.run()
      prosmart.readLog()
      if not prosmart.isRunning():
        if not prosmart.executed:
          self.executed = False
          self.running = False
          return

      if os.path.exists(prosmart.restraintsFileName) and len(self.restraintsFileName):
        prosmartRestraintsFile = open(prosmart.restraintsFileName, 'r')
        outputRestraintsFile = open(self.restraintsFileName, 'a')
        for line in prosmartRestraintsFile:
          outputRestraintsFile.write(line)
        prosmartRestraintsFile.close()
        outputRestraintsFile.close()

    if os.path.exists(self.dnaRestraintsFileName):
      dnaRestraintsFile = open(self.dnaRestraintsFileName, 'r')
      outputRestraintsFile = open(self.restraintsFileName, 'a')
      for line in dnaRestraintsFile:
        outputRestraintsFile.write(line)
      dnaRestraintsFile.close()
      outputRestraintsFile.close()

    if len(self.refmacs) == 3: # Special case of 3 Refmacs - testing external restraints with and without parameters, best one goes as input to jelly body
      self.refmacs[0].prepareRun()
      self.refmacs[0].run()
      if not self.refmacs[0].isRunning():
        if not self.refmacs[0].executed:
          self.executed = False
          self.running = False
          return
      self.refmacs[0].readLog()

      self.refmacs[1].prepareRun()
      self.refmacs[1].run()
      if not self.refmacs[1].isRunning():
        if not self.refmacs[1].executed:
          self.executed = False
          self.running = False
          return
      self.refmacs[1].readLog()

      self.refmacs[2].xyzin = min([self.refmacs[0], self.refmacs[1]], key=lambda y: y.log.getFinalRfree()).xyzout
      self.refmacs[2].prepareRun()
      self.refmacs[2].run()
      if not self.refmacs[2].isRunning():
        if not self.refmacs[2].executed:
          self.executed = False
          self.running = False
          return
      self.refmacs[2].readLog()

    else:
      for refmac in self.refmacs:
        refmac.prepareRun()
        refmac.run()
        if not refmac.isRunning():
          if not refmac.executed:
            self.executed = False
            self.running = False
            return
        refmac.readLog()


    self.executed = True
    self.running = False


  def isRunning(self):

    for prosmart in self.prosmarts:
      if prosmart.isRunning():
        self.executed = False
        self.running = True
        return self.running
      else:
        if not prosmart.executed:
          self.executed = False
          self.running = False
          return self.running
        else:
          prosmart.readLog()
          if self.ss:
            prosmart.cleanAllOutput()

    for refmac in self.refmacs:
      if refmac.isRunning():
        self.executed = False
        self.running = True
        return self.running
      else:
        if not refmac.executed:
          self.executed = False
          self.running = False
          return self.running
        else:
          refmac.readLog()

    self.executed = True
    self.running = False
    return self.running


  def terminate(self):

    for prosmart in self.prosmarts:
      if self.ss:
        prosmart.cleanAllOutput()


class PDBtoRefine:
  def __init__(self, pdbFileName, mtzFileName):
    self.pdbFileName = pdbFileName
    self.mtzFileName = mtzFileName
    self.twin = False
    self.scalingLSQ = False
    self.solvent = {}
    self.startingValues = {}
    self.chains = {}  # dictionary chainId : list of dictionaries: homologous chain:value, SeqID:value, local:value, global RMSD:value


class ProSMART:
  def __init__(self, prosmart='prosmart', pdbToRefine=None,
               chainRestraint=None, outputDirectoryBase='ProSMART_',
               allHomologs=False, restraint='', allHomologsList=None, dna=False):

    if not chainRestraint and not allHomologsList and not dna:
      self.die('ProSMART: No chain restraints supplied; terminating.')

    self.prosmart = prosmart
    self.executed = False
    self.running = False
    self.pdbToRefine = pdbToRefine
    self.chainRestraint = chainRestraint
    self.restraints = []
    self.outputDirectoryBase = outputDirectoryBase
    self.comFileName = ''
    self.logFileName = ''
    self.allHomologs = allHomologs
    self.allHomologsList = allHomologsList
    self.restraint = restraint
    self.outputDirectory = ''
    self.dna=dna
    self.restraintsFileName = ''
    self.prosmartWaitingTime = 120 # in seconds
    self.logSize = 0
    self.logTime = 0
    self.prosmartProcess = None
    self.cmd = []
    self.isWindows = False
    self.exitCodeFileName = ''
    self.noLogTime = 0.0

    cmd = [self.prosmart, '-p1', '%s' % os.path.abspath(self.pdbToRefine.pdbFileName)]

    if self.dna:
      if self.allHomologsList:
        cmd += ['-dna_rna','-restrain_best', '-p2']
        filesList = []
        for homologFileName in self.allHomologsList:
          filesList.append('%s' % os.path.abspath(homologFileName))
        cmd += filesList
        self.outputDirectory = os. getcwd() + os.sep + self.outputDirectoryBase + 'DNAhomol'
        self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.' + comFileExtension
        self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.log'
        self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAhomol.ecd'
      else:
        cmd += ['-dna_rna', '-self_restrain']
        self.outputDirectory = os. getcwd() + os.sep + self.outputDirectoryBase + 'DNAself'
        self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.' + comFileExtension
        self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.log'
        self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_DNAself.ecd'

    elif self.allHomologs:
      cmd.append(self.restraint)
      cmd.append('-p2')
      filesList = []
      for homologFileName in self.allHomologsList:
        filesList.append('%s' % os.path.abspath(homologFileName))
      cmd += filesList
      self.outputDirectory = os. getcwd() + os.sep + '%schainAll' % self.outputDirectoryBase
      self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.' + comFileExtension
      self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.log'
      self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_All.ecd'
      cmd +=['-side', '-sigmatype', '0']

    elif self.chainRestraint.type == 'Hbond':
      cmd.append('-c1')
      cmd.append(self.chainRestraint.chain)
      cmd.append('-bond')
      self.outputDirectory = os. getcwd() + os.sep + '%schain%s' % (self.outputDirectoryBase, self.chainRestraint.chain)
      self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.%s' % (self.chainRestraint.chain, comFileExtension)
      self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.log' % self.chainRestraint.chain
      self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.ecd' % self.chainRestraint.chain
      cmd +=['-side', '-sigmatype', '0']

    elif self.chainRestraint.type == 'Homologue':
      if len(self.chainRestraint.restraintList) < 1:
        self.die('ProSMART: No external homologs supplied for homolog-based restraints; terminating.')
      cmd.append('-c1')
      cmd.append(self.chainRestraint.chain)
      filesList = []
      chainsList = []
      for (homologFileName, chain) in self.chainRestraint.restraintList:
        filesList.append('%s' % os.path.abspath(homologFileName))
        chainsList.append(chain)
      cmd.append('-p2')
      cmd += filesList
      cmd.append('-c2')
      cmd += chainsList
      cmd +=['-restrain_all', '-side', '-sigmatype','0']
      self.outputDirectory = os. getcwd() + os.sep + '%schain%s' % (self.outputDirectoryBase, self.chainRestraint.chain)
      self.comFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.%s' % (self.chainRestraint.chain, comFileExtension)
      self.logFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.log' % self.chainRestraint.chain
      self.exitCodeFileName = os.getcwd() + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_prosmart_%s.ecd' % self.chainRestraint.chain

    else:
      self.die('ProSMART: wrong parameter, restraint type %s is not known; terminating.' % self.chainRestraint.type)

    cmd += ['-o', '%s' % self.outputDirectory]

# DNA??? - Oh yes, all  self-restraints (for protein chains as well) will be copied
    self.restraintsFileName = self.outputDirectory + os.sep + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt'

    self.cmd = cmd


  def die(self, message, removeOutputDirectory = False):
    sys.stdout.write('\n\n' + str(message) + '\n\n')
    if removeOutputDirectory and os.path.isdir(self.outputDirectory):
      try:
        shutil.rmtree(self.outputDirectory)
      except OSError:
        sys.stdout.write('Error during removal of InputAnalysis working directory\n')
    sys.exit(1)


  def prepareRun(self):

    outCom = open(self.comFileName, 'w')
    if not self.isWindows:
      outCom.write(shellHeader)
    outCom.write(' \\\n '.join(self.cmd) + ' > %s \n\n' % self.logFileName)
    outCom.close()

    if not self.isWindows:
      st = os.stat(self.comFileName)
      os.chmod(self.comFileName, st.st_mode | 0111 ) # chmod +x


  def run(self):
    self.running = True
    self.prosmartProcess = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    outLog = open(self.logFileName, 'w')
    for line in self.prosmartProcess.stdout:
      outLog.write(line)
      outLog.flush()
    outLog.close()

    exitCode = self.prosmartProcess.wait()

    outExitCode = open(self.exitCodeFileName, 'w')
    outExitCode.write( str(exitCode) )
    outExitCode.close()

    if exitCode == 0:
      self.readLog()

  def isRunning(self):

    if self.executed:
      self.running = False
      return self.running

    if not os.path.exists(self.exitCodeFileName):

      if not os.path.exists(self.logFileName):
        if self.noLogTime == 0.0:
          self.noLogTime = time.time()
          self.running = True
          return self.running
        else:
          curTime = time.time()
          if curTime - self.noLogTime > self.prosmartWaitingTime:
            self.executed = False
            self.running = False
            return self.running
          else:
            self.running = True
            return self.running
      else: # log file exists
        self.running = True
        return self.running

    else:
      exitCodeFile = open(self.exitCodeFileName,'r')
      exitCode = int(exitCodeFile.readline())
      exitCodeFile.close()

      if exitCode == 0:
        self.executed = True
        self.running = False
        return self.running
      else:
        self.executed = False
        self.running = False
        return self.running

  def readLog(self):

    output = None
    try:
      outputIN = open(self.logFileName, 'r')
      output = outputIN.read()
      outputIN.close()
    except IOError:
      self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName)

    match = re.search(r'ProSMART completed', output)
    if not self.dna and not match:
      sys.stdout.write('Something wrong with ProSMART run. ProSMART output: \n')
      self.die(output)

    # !!!
    # What happens with DNA key???
    # !!!

    # match = re.search(r'Final restraints file copied to:\s(.*)\n', output)
    #
    # if match:
    #   if os.path.exists(match.group(1)):
    #     self.restrainsFileName = os.path.abspath(match.group(1))
    #   else:
    #     sys.stdout.write('Cant locate ProSMART output file %s; terminating \n' % match.group(1))
    #     self.die(output)
    # else:
    #   sys.stdout.write('Cant locate ProSMART output file name in the log; terminating\n' )
    #   self.die(output)
    #
    # self.executed = True


    # if self.dna and not self.allHomologsList:
    #   #read self-restraints for DNA/RNA chains (which are not protein chains), if there are any
    #   dnaSelfRestrains = os.listdir(self.outputDirectory + '/Output_Files/Restraints/Chain_Chain/')
    #   if len(dnaSelfRestrains) < 1:
    #     self.die('ProSMART failed to generate self-restraints; terminating. \n')
    #
    #   proteinChains = self.pdbToRefine.chains.keys()
    #   for entry in dnaSelfRestrains:
    #     # match = re.search(r'^(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)_(\S+)_(\S)\.txt$', entry)
    #     match = re.search(r'^(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)_(' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + ')_(\S)\.txt$', entry)
    #     if match:
    #       pdb1 = match.group(1)
    #       chain1 = match.group(2)
    #       pdb2 = match.group(3)
    #       chain2 = match.group(4)
    #
    #       if chain1 != chain2:
    #         self.die('ProSMART: incorrect file names during self-restraints generation; terminating. \n')
    #
    #       if chain1 not in proteinChains:
    #         try:
    #           restrainsIN = open(self.outputDirectory + '/Output_Files/Restraints/Chain_Chain/'+ entry, 'r')
    #           if restrainsIN:
    #             for currentString in restrainsIN:
    #               if currentString[0] != '#':
    #                 self.restrains.append(currentString)
    #             restrainsIN.close()
    #         except IOError:
    #           self.die(
    #             'ProSMART: can`t open restrains file %s ; terminating.' %
    #             self.outputDirectory + '/Output_Files/Restrains/Chain_Chain/'+ entry)
    #
    #
    # else:
    #   try:
    #     restrainsIN = open(self.outputDirectory + '/' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt', 'r')
    #     if restrainsIN:
    #       for currentString in restrainsIN:
    #         if currentString[0] != '#':
    #           self.restrains.append(currentString)
    #       restrainsIN.close()
    #   except IOError:
    #     if not self.dna:
    #       self.die(
    #         'ProSMART: can`t open restrains file %s; terminating.' %
    #         self.outputDirectory + '/' + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '.txt')

  def cleanAllOutput(self):
    try:
      if os.path.exists(self.outputDirectory):
        shutil.rmtree(self.outputDirectory)
    except OSError:
      sys.stdout.write('\nCan`t remove ProSMART directory %s, trying to continue.\n' % self.outputDirectory)
      return


class Refmac:
  def __init__(self, refmac='refmac5', xyzin='', xyzout='', hklin='', hklout='', onCluster=False, ncyc=0):

    self.refmac = refmac
    self.logFileName = ''
    self.comFileName = ''
    self.hydrogens = 'NO'
    self.jellyBody = False
    self.jellyBodyWeight = 0.01
    self.proSMARTParams = False
    self.proSMARTFile = ''
    self.proSMARTScale = 10
    self.proSMARTWeight = 0.02
    self.proSMARTDmax = 4.2
    self.twin = False
    self.ncs = False
    self.phased = False
    self.labin = ''
    self.free_1 = False
    self.solventOptimise = False
    self.solvent = {}
    self.tls = ''
    self.scalingLSQ = False
    self.useWeight = False
    self.weight = 0.3
    self.params = ''
    self.molprobity = False
    self.executed = False
    self.log = None
    self.xyzin = os.path.abspath(xyzin)
    self.hklin = os.path.abspath(hklin)
    self.isWindows = False

    if len(xyzout) > 1:
      self.xyzout = xyzout
    else:
      self.successfullyLoaded = False
      return

    if len(hklout) > 1:
      self.hklout = hklout
    else:
      self.successfullyLoaded = False
      return

    self.onCluster = onCluster
    self.ncyc = ncyc
    self.xmlFileName = self.xyzout[:-3] + 'xml'
    self.comFileName = self.xyzout[:-3] + comFileExtension
    self.logFileName = self.xyzout[:-3] + 'log'
    self.exitCodeFileName = self.xyzout[:-3] + 'ecd'

    self.running = False

    self.refmacWaitingTime = 120 #in seconds
    self.noLogTime = 0.0
    self.noComTime = 0.0

    self.refmacProcess = None
    self.cmd = []

    self.successfullyLoaded = True

  def prepareRun(self):

    cmd = [self.refmac,
           'XYZIN', '%s' % self.xyzin,
           'HKLIN', '%s' % self.hklin,
           'XYZOUT', '%s' % self.xyzout,
           'HKLOUT', '%s' % self.hklout,
           'XMLOUT', '%s' % self.xmlFileName]

    if len(self.tls) > 1:
      cmd.append('TLSIN')
      cmd.append('%s' % self.tls)

    if self.onCluster:
      cmd.append('SCRREF')
      cmd.append(os.sep +'tmp'+os.sep+ 'refmac_tmp_' + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(12)))

    self.cmd = cmd

    self.params = 'MAKE - \n'
    self.params += 'HYDROGEN ' + self.hydrogens + ' - \n'
    self.params += 'NEWLIGAND CONTINUE \n'

    if self.phased:
      self.params += 'REFI PHASED \n'

    self.params += 'NCYC ' + str(self.ncyc) + ' \n'

    if self.solventOptimise:
      self.params += 'SOLVENT OPTIMISE\n'

    if len(self.solvent) == 3:
      self.params += 'SOLVENT YES -\n'
      self.params += '    VDWProb %0.2f -\n' % self.solvent['VDWProb']
      self.params += '    IONProb %0.2f -\n' % self.solvent['IONProb']
      self.params += '    RSHRink %0.2f \n' % self.solvent['RSHRink']

    if self.twin:
      self.params += 'TWIN \n'

    if self.ncs:
      self.params += 'NCSR LOCAL  \n'

    if len(self.tls) > 1:
      self.params += 'REFI TLSC 10  \n'


    if self.proSMARTParams:
#      if not os.path.exists(self.proSMARTFile):
#        sys.stdout.write('ProSMART file %s does not exists; terminating.\n' % self.proSMARTFile)
#        sys.exit(1)
      self.params += 'EXTERNAL WEIGHT SCALE ' + str(self.proSMARTScale) + ' \n'
      self.params += 'EXTERNAL WEIGHT GMWT ' + str(self.proSMARTWeight) + ' \n'
      self.params += 'EXTERNAL DMAX ' + str(self.proSMARTDmax) + ' \n'

    if len(self.proSMARTFile) > 1:
      self.params += '@' + self.proSMARTFile + ' \n'

    if self.jellyBody:
      self.params += 'RIDG DIST SIGM  ' + str(self.jellyBodyWeight) + ' \n'

    if self.useWeight:
      self.params += 'WEIGHT MATRIX ' + str(self.weight) + ' \n'

    if self.free_1:
      self.params += 'FREE 1 \n'

    if self.scalingLSQ:
      self.params += 'SCALE LSSCALE FUNCTION LSQ \n'

    self.params += 'MONI DIST 1000000 \n'

    if len(self.labin) > 1:
      self.params += 'LABIN %s \n' % self.labin

    self.params += 'END \n\n'

    outCom = open(self.comFileName, 'w')

    if not self.isWindows:
      outCom.write(shellHeader)

    outCom.write(' \\\n '.join(cmd) + ' > ' + self.logFileName + ' << EOF\n')
    outCom.write(self.params)
    outCom.write('EOF\n\n')
    outCom.close()

    st = os.stat(self.comFileName)
    os.chmod(self.comFileName, st.st_mode | 0111 )

  def run(self):
    #os.system('/bin/sh "%s"' % self.comFileName)
    self.running = True
    self.refmacProcess = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE)

    for param in self.params:
      self.refmacProcess.stdin.write(param)
      self.refmacProcess.stdin.flush()

    outLog = open(self.logFileName, 'w')
    for line in self.refmacProcess.stdout:
      outLog.write(line)
      outLog.flush()
    outLog.close()

    exitCode = self.refmacProcess.wait()

    outExitCode = open(self.exitCodeFileName, 'w')
    outExitCode.write( str(exitCode) )
    outExitCode.close()

    if exitCode == 0:
      self.readLog()


  def isRunning(self):

    if os.path.exists(self.xyzout) and os.path.exists(self.hklout) and os.path.exists(self.xmlFileName):
      self.executed = True
      self.running = False
      return self.running

    if not os.path.exists(self.exitCodeFileName):
      if os.path.exists(self.comFileName):
        if not os.path.exists(self.logFileName):
          if self.noLogTime == 0.0:
            self.noLogTime = time.time()
            self.running = True
            return self.running
          else:
            curTime = time.time()
            if curTime - self.noLogTime > self.refmacWaitingTime:
              self.executed = False
              self.running = False
              return self.running
            else:
              self.running = True
              return self.running
        else: # log file exists
          self.running = True
          return self.running
      else: # no com file

        if self.noComTime == 0.0:
          self.noComTime = time.time()
          self.running = True
          return self.running
        else:
          curTime = time.time()
          if curTime - self.noComTime > self.refmacWaitingTime:
            self.executed = False
            self.running = False
            return self.running
          else:
            self.running = True
            return self.running

    else: # there is exit code file -> Refmac finished
      exitCodeFile = open(self.exitCodeFileName,'r')
      exitCode = int(exitCodeFile.readline())
      exitCodeFile.close()
      if exitCode == 0:
        self.executed = True
        self.running = False
        return self.running
      else:
        self.executed = False
        self.running = False
        return self.running


  def readLog(self):
    if os.path.exists(self.xmlFileName):
      if self.solventOptimise:
        self.log = RefmacXMLLog(self.xmlFileName, solventTest=True, molprobity = self.molprobity, pdbFileName=self.xyzout)
      else:
        self.log = RefmacXMLLog(self.xmlFileName, molprobity = self.molprobity, pdbFileName=self.xyzout)
      return True
    else:
      return False


  def cleanAllOutput(self):
    try:
      if os.path.exists(self.hklout):
        os.remove(self.hklout)
      if os.path.exists(self.xyzout):
        os.remove(self.xyzout)
      if os.path.exists(self.comFileName):
        os.remove(self.comFileName)
      if os.path.exists(self.logFileName):
        os.remove(self.logFileName)
      if os.path.exists(self.xmlFileName):
        os.remove(self.xmlFileName)

    except OSError:
      sys.stdout.write('\nCan`t clean files after Refmac, trying to continue.\n')
      return


class RefmacXMLLog:
  def __init__(self, fileName, solventTest = False, molprobity = False, pdbFileName = ''):
    self.successfullyLoaded = False
    self.ncyc = 0
    self.successfullyRefined = False
    self.twin = False
    self.cycles = []
    self.solvent = {}
    self.molprobity = molprobity
    self.molprobityAnalysis = {}

    if not os.path.isfile(fileName):
      sys.stdout.write('No XML log-file found after Refmac run; terminating.\nPlease check input files.\n\n')
      sys.exit(1)

    try:
      xmlRoot = ET.parse(fileName).getroot()

      if int(xmlRoot.find('twin_info').find('ntwin_domain').text.strip()) > 1:
        self.twin = True

      if solventTest:
        self.solvent['VDWProb'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('vdw_probe_radius').text.strip())
        self.solvent['IONProb'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('ion_probe_radius').text.strip())
        self.solvent['RSHRink'] = float(xmlRoot.find('solvent_info').find('params_rfree').find('shrinkage_radius').text.strip())

      else:

        for cycles in xmlRoot.find('Overall_stats').find('stats_vs_cycle'):
          cycle = {
            'Ncyc': int(cycles.find('cycle').text.strip()),
            'Rfact': float(cycles.find('r_factor').text.strip()),
            'Rfree': float(cycles.find('r_free').text.strip())
          }
          if cycles.find('rmsBOND').text.strip()[0] != '*':
            cycle['rmsBOND'] = float(cycles.find('rmsBOND').text.strip())
          else:
            cycle['rmsBOND'] = 0
          cycle['rmsANGL'] = float(cycles.find('rmsANGLE').text.strip())
          cycle['rmsCHIRAL'] = float(cycles.find('rmsCHIRAL').text.strip())

          self.cycles.append(copy.deepcopy(cycle))

    except:
      sys.stdout.write('Problems with parsing Refmac XML log-file; terminating\n\n')
      sys.exit(1)

    self.ncyc = int(len(self.cycles) - 1)
    self.successfullyLoaded = True
    if not solventTest and self.cycles[0]['Rfree'] > self.cycles[self.ncyc]['Rfree']:
      self.successfullyRefined = True
    elif solventTest and len(self.solvent) == 3:
      self.successfullyRefined = True
    else:
      self.successfullyRefined = False

    if self.molprobity:
      molprobProcess = subprocess.Popen(['phenix.molprobity', pdbFileName, 'quiet=True', 'percentiles=True'], stdout=subprocess.PIPE)
      (out, err) = molprobProcess.communicate()
      for str in out.splitlines():
        mol_parameters = str.split('=')
        if 'Ramachandran' in mol_parameters[0].strip():
          self.molprobityAnalysis['ramaOutliers'] = float(mol_parameters[1].strip()[:-1])
          self.cycles[0]['ramaOutliers'] = self.molprobityAnalysis['ramaOutliers']
        if 'favored' in mol_parameters[0].strip():
          self.molprobityAnalysis['ramaFavoured'] = float(mol_parameters[1].strip()[:-1])
          self.cycles[0]['ramaFavoured'] = self.molprobityAnalysis['ramaFavoured']
        if 'Clashscore' in mol_parameters[0].strip():
          self.molprobityAnalysis['clash'] = float(re.search(r'^(\S*)\s.*', mol_parameters[1].strip()).group(1) )
          self.molprobityAnalysis['clash_percentile'] = float(re.search(r'percentile:\s(\S*)\)', mol_parameters[1].strip()).group(1) )
          self.cycles[0]['clash'] = self.molprobityAnalysis['clash']
          self.cycles[0]['clash_percentile'] = self.molprobityAnalysis['clash_percentile']
        if 'MolProbity' in mol_parameters[0].strip():
          self.molprobityAnalysis['molprobity'] = float(re.search(r'^(\S*)\s.*', mol_parameters[1].strip()).group(1) )
          self.molprobityAnalysis['molprobity_percentile'] = float(re.search(r'percentile:\s(\S*)\)', mol_parameters[1].strip()).group(1) )
          self.cycles[0]['molprobity'] = self.molprobityAnalysis['molprobity']
          self.cycles[0]['molprobity_percentile'] = self.molprobityAnalysis['molprobity_percentile']

  def getRamaOutliers(self):
    return self.molprobityAnalysis['ramaOutliers']

  def getRamaFavoured(self):
    return self.molprobityAnalysis['ramaFavoured']

  def getClashscore(self):
    return self.molprobityAnalysis['clash']

  def getClashscore_percentile(self):
    return self.molprobityAnalysis['clash_percentile']

  def getMolprobity(self):
    return self.molprobityAnalysis['molprobity']

  def getMolprobity_percentile(self):
    return self.molprobityAnalysis['molprobity_percentile']

  def getAllRfree(self):
    rfrees = []
    for cycle in self.cycles:
      rfrees.append(cycle['Rfree'])
    return rfrees

  def getStartRfact(self):
    return self.cycles[0]['Rfact']

  def getStartRfree(self):
    return self.cycles[0]['Rfree']

  def getStartrmsBOND(self):
    return self.cycles[0]['rmsBOND']

  def getStartrmsANGL(self):
    return self.cycles[0]['rmsANGL']

  def getStartrmsCHIRAL(self):
    return self.cycles[0]['rmsCHIRAL']

  def getFinalRfact(self):
    return self.cycles[self.ncyc]['Rfact']

  def getFinalRfree(self):
    return self.cycles[self.ncyc]['Rfree']

  def getFinalrmsBOND(self):
    return self.cycles[self.ncyc]['rmsBOND']

  def getFinalrmsANGL(self):
    return self.cycles[self.ncyc]['rmsANGL']

  def getFinalrmsCHIRAL(self):
    return self.cycles[self.ncyc]['rmsCHIRAL']

  def getRefinementRfactChange(self):
    return self.cycles[self.ncyc]['Rfact'] - self.cycles[0]['Rfact']

  def getRefinementRfreeChange(self):
    return self.cycles[self.ncyc]['Rfree'] - self.cycles[0]['Rfree']

  def getRefinementrmsBONDChange(self):
    return self.cycles[self.ncyc]['rmsBOND'] - self.cycles[0]['rmsBOND']

  def getRefinementrmsANGLChange(self):
    return self.cycles[self.ncyc]['rmsANGL'] - self.cycles[0]['rmsANGL']

  def getRefinementrmsCHIRALChange(self):
    return self.cycles[self.ncyc]['rmsCHIRAL'] - self.cycles[0]['rmsCHIRAL']


class InputAnalysis:
  def __init__(self, outputDirectory):
    try:
      outputDirectory = os.path.abspath(outputDirectory)
      if outputDirectory[-1:] != os.sep:
        outputDirectory += os.sep

      if not os.path.exists(outputDirectory):
          os.makedirs(outputDirectory)

    except OSError:
        self.die('"InputAnalysis" can`t prepare output directory %s' % outputDirectory)

    self.outputDirectory = outputDirectory
    self.prosmart = 'prosmart'
    self.comFileName = ''
    self.logFileName = ''
    self.labin = ''
    self.free_1 = False
    self.molprobity = False

  def die(self, message, removeOutputDirectory = False):
    sys.stdout.write('\n\n' + str(message) + '\n\n')
    if removeOutputDirectory and os.path.isdir(self.outputDirectory):
      try:
        shutil.rmtree(self.outputDirectory)
      except OSError:
        sys.stdout.write('Error during removal of InputAnalysis working directory\n')
    sys.exit(1)


  def twinTest(self, pdbToRefine):

    pdbTwintest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'twintest.pdb', hklin=pdbToRefine.mtzFileName,
                         hklout=self.outputDirectory + 'twintest.mtz')
    if pdbTwintest.successfullyLoaded:
      pdbTwintest.twin = True
      pdbTwintest.ncyc = 0
      if len(self.labin) > 0:
        pdbTwintest.labin = self.labin
      pdbTwintest.free_1 = self.free_1
      pdbTwintest.prepareRun()
      pdbTwintest.run()
    else:
      self.die('Can`t prepare Refmac run for twin test; terminating.\nPlease make sure that input files are correct.')
    if pdbTwintest.log.successfullyLoaded:
      pdbTwintest.cleanAllOutput()
      return pdbTwintest.log.twin
    else:
      self.die('Refmac run during twin test failed; terminating.\nPlease make sure that input files are correct.')


  def mr(self, pdbToRefine):

    refmacMRtest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'mrTest.pdb', hklin=pdbToRefine.mtzFileName,
                         hklout=self.outputDirectory + 'mrTest.mtz')
    if refmacMRtest.successfullyLoaded:
      refmacMRtest.twin = pdbToRefine.twin
      refmacMRtest.ncyc = 3
      if len(self.labin) > 0:
        refmacMRtest.labin = self.labin
      refmacMRtest.free_1 = self.free_1
      refmacMRtest.jellyBody = True
      refmacMRtest.prepareRun()
      refmacMRtest.run()
    else:
      self.die('Can`t prepare Refmac run for jelly body cycles after MR (1 of 2); terminating.\nPlease make sure that input files are correct.')
    if refmacMRtest.log.successfullyLoaded:

      pdbName = os.path.basename(pdbToRefine.pdbFileName)

      refmacMR = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'mr_' + pdbName, hklin=pdbToRefine.mtzFileName,
                           hklout=self.outputDirectory +  'mr_' + pdbName[:-3] + 'mtz')
      if refmacMR.successfullyLoaded:
        refmacMR.twin = pdbToRefine.twin

        if refmacMRtest.log.getFinalRfree() > 0:
          if refmacMRtest.log.getFinalRfree() < 0.35:
            refmacMR.ncyc = 100
          else:
            refmacMR.ncyc = 200
        else:
          refmacMR.ncyc = 200

        if len(self.labin) > 0:
          refmacMR.labin = self.labin
        refmacMR.free_1 = self.free_1
        refmacMR.jellyBody = True
        refmacMR.molprobity = self.molprobity
        refmacMR.prepareRun()
        refmacMR.run()
      else:
        self.die('Can`t prepare Refmac run for jelly body cycles after MR (2 of 2); terminating.\nPlease make sure that input files are correct.')

      if refmacMR.log.successfullyLoaded:
        return refmacMR
      else:
        self.die('Refmac run during jelly body cycles after MR failed (2 of 2); terminating.\nPlease make sure that input files are correct.')

    else:
      self.die('Refmac run during jelly body cycles after MR failed (1 of 2); terminating.\nPlease make sure that input files are correct.')


  def scaleTestAndStartingValues(self, pdbToRefine):

    pdbScaletest1 = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'scaletest1.pdb', hklin=pdbToRefine.mtzFileName,
                           hklout=self.outputDirectory + 'scaletest1.mtz')
    if pdbScaletest1.successfullyLoaded:
      pdbScaletest1.ncyc = 0
      if pdbToRefine.twin:
        pdbScaletest1.twin = True
      if len(self.labin) > 0:
        pdbScaletest1.labin = self.labin
      pdbScaletest1.free_1 = self.free_1
      pdbScaletest1.molprobity = self.molprobity
      pdbScaletest1.prepareRun()
      pdbScaletest1.run()
    else:
      self.die(
        'Can`t prepare Refmac run for scaling test 1; terminating.\nPlease make sure that input files are correct.')
    if not pdbScaletest1.log.successfullyLoaded:
      self.die('Refmac run during scaling test 1 failed; terminating.\nPlease make sure that input files are correct.')
    pdbScaletest1.cleanAllOutput()

    pdbScaletest2 = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'scaletest2.pdb', hklin=pdbToRefine.mtzFileName,
                           hklout=self.outputDirectory + 'scaletest2.mtz')
    if pdbScaletest2.successfullyLoaded:
      pdbScaletest2.ncyc = 0
      pdbScaletest2.scalingLSQ = True
      if pdbToRefine.twin:
        pdbScaletest2.twin = True
      if len(self.labin) > 0:
        pdbScaletest2.labin = self.labin
      pdbScaletest2.free_1 = self.free_1
      pdbScaletest2.molprobity = self.molprobity
      pdbScaletest2.prepareRun()
      pdbScaletest2.run()
    else:
      self.die(
        'Can`t prepare Refmac run for scaling test 2; terminating.\nPlease make sure that input files are correct.')
    if not pdbScaletest2.log.successfullyLoaded:
      self.die('Refmac run during scaling test 2 failed; terminating.\nPlease make sure that input files are correct.')
    pdbScaletest2.cleanAllOutput()

    solventTest = Refmac(xyzin=pdbToRefine.pdbFileName, xyzout=self.outputDirectory + 'solventTest.pdb', hklin=pdbToRefine.mtzFileName,
                           hklout=self.outputDirectory + 'solventTest.mtz')
    if solventTest.successfullyLoaded:
      solventTest.ncyc = 0
      solventTest.solventOptimise = True
      if pdbToRefine.twin:
        pdbScaletest2.twin = True
      if pdbScaletest1.log.getFinalRfree() > pdbScaletest2.log.getFinalRfree():
        solventTest.scalingLSQ = True
      if len(self.labin) > 0:
        solventTest.labin = self.labin
      solventTest.free_1 = self.free_1
      solventTest.prepareRun()
      solventTest.run()
    else:
      self.die(
        'Can`t prepare Refmac run for solvent test; terminating.\nPlease make sure that input files are correct.')
    if not solventTest.log.successfullyLoaded:
      self.die('Refmac run during solvent test failed; terminating.\nPlease make sure that input files are correct.')
    solventTest.cleanAllOutput()

    if pdbScaletest1.log.getFinalRfree() > pdbScaletest2.log.getFinalRfree():
      pdbToRefine.scalingLSQ = True
      pdbToRefine.startingValues = pdbScaletest2.log.cycles[0]
      pdbToRefine.solvent = solventTest.log.solvent
      return pdbToRefine
    else:
      pdbToRefine.scalingLSQ = False
      pdbToRefine.startingValues = pdbScaletest1.log.cycles[0]
      pdbToRefine.solvent = solventTest.log.solvent
      return pdbToRefine


  def matchToHomologs(self, pdbFileName, homologsFilesList, pdbChain = None, homologsChains = None, ss = False):

    chains = {}  # dictionary chainId : list of dictionaries: homologous chain, SeqID, local, global RMSD

    parametersOUT = open(self.outputDirectory + 'p1.txt', 'w')
    parametersOUT.write('-p1\n')
    parametersOUT.write('%s\n' % os.path.abspath(pdbFileName))

    if homologsFilesList:
      for fileName in homologsFilesList:
        parametersOUT.write('%s\n' % os.path.abspath(fileName))
    parametersOUT.close()

    if pdbChain:
      parametersOUT = open(self.outputDirectory + 'c1.txt', 'w')
      parametersOUT.write('-c1\n')
      parametersOUT.write('%s\n' % pdbChain)

      if homologsChains:
        for chain in homologsChains:
          parametersOUT.write('%s\n' % chain)
      parametersOUT.close()

    cmd = [self.prosmart, '-f', '%sp1.txt' % self.outputDirectory]
    if pdbChain:
      cmd += ['-f', '%sc1.txt' % self.outputDirectory]
    cmd += ['-a1', '-quick', '-o', '%sProSMART_Output%s' % (self.outputDirectory, os.sep)]
    # output = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout.read()

    self.comFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_prosmart.' + comFileExtension
    self.logFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_prosmart.log'

    outCom = open(self.comFileName, 'w')

    if not isWindows:
      outCom.write(shellHeader)
    outCom.write(' \\\n '.join(cmd) + ' > %s \n\n' % self.logFileName)
    outCom.close()

    if not isWindows:
      st = os.stat(self.comFileName)
      os.chmod(self.comFileName, st.st_mode | 0111 )

#    os.system('/bin/sh "%s"' % self.comFileName)
    prosmartProcess = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    outLog = open(self.logFileName, 'w')
    for line in prosmartProcess.stdout:
      outLog.write(line)
    outLog.close()

    outputIN = open(self.logFileName, 'r')
    if not outputIN:
      self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName)
    output = outputIN.read()
    outputIN.close()

    match = re.search(r'ProSMART completed', output)
    if not match:
      sys.stdout.write('Something wrong with ProSMART run for the input analysis. ProSMART output: \n')
      self.die(output)

    homologsFilesList.append(pdbFileName)

    allPDBandChainsList = os.listdir(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Global_Statistics'+os.sep+'Single'+os.sep)
    if len(allPDBandChainsList) < 1:
      sys.stdout.write('ProSMART failed to generate statistics files for input analysis. ProSMART output: \n')
      self.die(output)

#    rmsdRejection = False

    match = False

    for entry in allPDBandChainsList:
      for homolog in homologsFilesList:
        match = re.search(r'^(' + os.path.basename(pdbFileName)[:-4] + ')_(\S)_(' + os.path.basename(homolog)[:-4] + ')_(\S)\.txt$', entry)
        if match:
          break

      if match:
        pdb1 = match.group(1)
        chain1 = match.group(2)
        pdb2 = match.group(3)
        chain2 = match.group(4)

        if pdb1 != pdb2:
          chainInfo = {}

          lengthOfCurrentChain = 1

          try:
            lengthOfCurrentChainFile = os.stat(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+'%s_%s.txt' %(pdb1, chain1)).st_size
            lengthOfCurrentChain = lengthOfCurrentChainFile - 8 - int (lengthOfCurrentChainFile/10)
          except:
            self.die('Error when getting statistics for file %s; terminating ' % self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+'%s_%s.txt' %(pdb1, chain1))

          for fileName in homologsFilesList:
            if os.path.basename(fileName)[:-4] == pdb2:
              chainInfo['pdb'] = fileName
              break
          chainInfo['chain'] = chain2

          try:
            chainInfoIN = open(self.outputDirectory + 'ProSMART_Output'+os.sep+'Output_Files'+os.sep+'Global_Statistics'+os.sep+'Single'+os.sep + entry)
          except IOError:
            self.die('Can`t open ProSMART generated file %s during input analysis.' % entry)

          for currentString in chainInfoIN:
            parameters = currentString.split()
            chainInfo[parameters[0]] = parameters[1]
          chainInfoIN.close()

          # To calculate PercentAligned use 'NumberAligned' - 'Aligned'

          # New ProSMART version support
          if 'GlobalRMSD' in chainInfo.keys():
            chainInfo['RMSD'] = chainInfo['GlobalRMSD']
          if 'NumberAligned' in chainInfo.keys():
            chainInfo['Aligned'] = chainInfo['NumberAligned']
          if 'AverageFlexible' in chainInfo.keys():
            chainInfo['Minimum'] = chainInfo['AverageFlexible']
          if 'SequenceIdentity' in chainInfo.keys():
            chainInfo['SeqIden'] = chainInfo['SequenceIdentity']
          chainInfo['Percent'] = (float(chainInfo['Aligned']) / float(lengthOfCurrentChain) ) * 100

          if 'SeqIden' not in chainInfo.keys() or \
                  'Minimum' not in chainInfo.keys() or \
                  'Percent' not in chainInfo.keys() or \
                  'RMSD' not in chainInfo.keys():
            self.die('Can`t find SeqIden or Minimum or RMSD data in ProSMART output; terminating.\n')

          if float(chainInfo['RMSD']) >= rmsdTreshold   and \
                  float(chainInfo['Percent'])/100 >= MinimalPercentAligned and \
                  float(chainInfo['SeqIden'])/100 >= MinimalSequenceIdentityForHomologs:

            if chain1 in chains.keys():
              chains[chain1].append(copy.deepcopy(chainInfo))
            else:
              chains[chain1] = []
              chains[chain1].append(copy.deepcopy(chainInfo))

        else:
          if chain1 not in chains.keys():
            chains[chain1] = []
          if chain2 not in chains.keys():
            chains[chain2] = []

    if ss:
      try:
        if os.path.exists('%sProSMART_Output%s' % (self.outputDirectory,os.sep)):
          shutil.rmtree('%sProSMART_Output%s' % (self.outputDirectory, os.sep))
      except OSError:
        sys.stdout.write('\nCan`t remove ProSMART directory %s, trying to continue.\n' % '%sProSMART_Output' % self.outputDirectory)

    return chains

  def getSequence(self, pdbFileName):

    chainSequences = []

    cmd = [self.prosmart, '-p1', '%s' % os.path.abspath(pdbFileName), '-a1', '-quick',
           '-o', '%sProSMART_Chains%s' % (self.outputDirectory, os.sep)]
    # output = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout.read()

    self.comFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_chains.' + comFileExtension
    self.logFileName = self.outputDirectory + os.path.basename(pdbFileName)[:-4] + '_chains.log'

    outCom = open(self.comFileName, 'w')

    if not isWindows:
      outCom.write(shellHeader)
    outCom.write(' \\\n '.join(cmd) + ' > %s \n\n' % self.logFileName)
    outCom.close()

    if not isWindows:
      st = os.stat(self.comFileName)
      os.chmod(self.comFileName, st.st_mode | 0111 )

#    os.system('/bin/sh "%s"' % self.comFileName)
    prosmartProcess = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    outLog = open(self.logFileName, 'w')
    for line in prosmartProcess.stdout:
      outLog.write(line)
    outLog.close()


    outputIN = open(self.logFileName, 'r')
    if not outputIN:
      self.die('Can`t read ProSMART log-file %s; terminating' % self.logFileName)
    output = outputIN.read()
    outputIN.close()

    match = re.search(r'ProSMART completed', output)
    if not match:
      sys.stdout.write('Something wrong with ProSMART run for the input analysis. ProSMART output: \n')
      self.die(output)

    allChains = os.listdir(self.outputDirectory + 'ProSMART_Chains'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep)
    if len(allChains) < 1:
      sys.stdout.write('ProSMART failed to generate statistics files for input analysis. ProSMART output: \n')
      self.die(output)

    for fileName in allChains:
      match = re.search(os.path.basename(pdbFileName)[:-4]+'_(\S)\.txt', fileName)
      if match:
        chain = match.group(1)
        sequenceFile = open(self.outputDirectory + 'ProSMART_Chains'+os.sep+'Output_Files'+os.sep+'Sequence'+os.sep+fileName,'r')
        firstString = sequenceFile.readline()
        chainSequence = ''
        for line in sequenceFile:
          chainSequence += line.strip()
        sequenceFile.close()
        if len(chainSequence) > 0:
          chainSequences.append((chain, ''.join(chainSequence.split()))) # removing internal spaces

    if len(chainSequences) > 0:
      return chainSequences
    else:
      self.die('Can`t get protein sequence from the supplied PDB file; terminating.')


class Pipeline:
  def __init__(self):

    self.nc = 3 # maximal number of chains for restraints generation
    self.protocols = []  # List of RefinementProtocol classes
    self.outputDirectory = '.'+os.sep+'lrsr_Output'+os.sep
    self.pdbToRefine = None
    self.homologsFilesList = None
    self.bestPDB = ''
    self.bestMTZ = ''
    self.logFile = None
    self.dna = False
    self.tls = ''
    self.ss = False
    self.labin = ''
    self.free_1 = False
    self.nCPU = 1
    self.freeCPUs = 1
    self.phased = False
    self.mr = False
    self.molprobity = False
    self.maximalNumberOfHomologs = 5
    self.lowestResolutionForAutoHomologues = 3.5
    self.xyzout = None
    self.hklout = None
    self.windows = False

  def die(self, message, removeOutputDirectory=False):
    sys.stdout.write('\n\n' + str(message) + '\n\n')
    if removeOutputDirectory and os.path.isdir(self.outputDirectory):
      try:
        shutil.rmtree(self.outputDirectory)
      except OSError:
        sys.stdout.write('Error during removal of program`s working directory\n')
    sys.exit(1)

  def parseCommandLine(self, arguments):

    parser = argparse.ArgumentParser(add_help=False)
    parser.set_defaults(auto=False)
    parser.add_argument('-xyzin', '-p1', '--pdb1', action='store', dest='pdb1', help=argparse.SUPPRESS)
    parser.add_argument('-hklin', '-f', '--mtz', action='store', dest='f', help=argparse.SUPPRESS)
    parser.add_argument('-xyzout', action='store', dest='xyzout', help=argparse.SUPPRESS)
    parser.add_argument('-hklout', action='store', dest='hklout', help=argparse.SUPPRESS)
    parser.add_argument('-p2', '--pdb2', nargs='*', action='store', dest='pdb2', help=argparse.SUPPRESS)
    parser.add_argument('-o', '--output', action='store', dest='output', help=argparse.SUPPRESS)
    parser.add_argument('-pr', '--protocol', action='store', dest='protocol', help=argparse.SUPPRESS)
    parser.add_argument('-nc', '--num_chains', action='store', dest='nc', type=int, help=argparse.SUPPRESS)
    parser.add_argument('-auto', action='store_true', dest='auto', help=argparse.SUPPRESS)
    parser.add_argument('-nh', '--num_homologs', action='store', dest='nh', type=int, help=argparse.SUPPRESS)
    parser.add_argument('-dna', action='store_true', dest='dna', help=argparse.SUPPRESS)
    parser.add_argument('-tls', action='store', dest='tls', type=str, help=argparse.SUPPRESS)
    parser.add_argument('-save_space', action='store_true', dest='ss', help=argparse.SUPPRESS)
    parser.add_argument('-labin', action='store', dest='labin', help=argparse.SUPPRESS)
    parser.add_argument('-phased', action='store_true', dest='phased', help=argparse.SUPPRESS)
    parser.add_argument('-free_1', action='store_true', dest='free_1', help=argparse.SUPPRESS)
    parser.add_argument('-cpu', action='store', dest='cpu', type=int, help=argparse.SUPPRESS)
    parser.add_argument('-mr', action='store_true', dest='mr', help=argparse.SUPPRESS)
    parser.add_argument('-minres', action='store', dest='minRes', type=float, help=argparse.SUPPRESS)

    parameters = parser.parse_args(arguments)
    pdb2 = []
    protocol = None

    if not parameters.pdb1:
      self.die('%s\nProgram needs model to refine (-p1)' % helpMessage)

    if not parameters.f:
      self.die('%s\nProgram needs data to refine (-f) ' % helpMessage)

    if not os.path.isfile(parameters.pdb1):
      self.die('%s\nModel file "%s" (-p1) does not exist' % (helpMessage, parameters.pdb1))

    if not os.path.isfile(parameters.f):
      self.die('%s\nData file "%s" (-f) does not exist' % (helpMessage, parameters.f))

    if parameters.output:
      if parameters.output[-1:] != os.sep:
        parameters.output += os.sep
      self.outputDirectory = parameters.output

    # adding number to the directory name if the directory exists
    if os.path.exists(self.outputDirectory):
      self.outputDirectory = generateNextDirectoryName(self.outputDirectory)

    try:
      os.makedirs(self.outputDirectory)
    except OSError:
      self.die('%s\nProgram can not make output directory %s' % (helpMessage, self.outputDirectory))

    sys.stdout = Logger(self.outputDirectory +'Pipeline.log')
    
    sys.stdout.write ('Working in: %s\n\n' % self.outputDirectory)
    sys.stdout.write ('Program received following parameters: \n')

    if parameters.pdb2: # Pipeline can accept as input file names, wildcards with *, ?, etc, or text files with one PDB file name per line
      for currentFile in parameters.pdb2:
        if not os.path.isfile(currentFile):
          filesList = glob.glob(currentFile) # expanding wildcard
          if len(filesList) < 1:
            self.die('%s\nHomologous model file "%s" (-p2) does not exist' % (helpMessage, currentFile))
          else:
            for anotherCurrentFile in filesList:
              pdb2.append(anotherCurrentFile)
            continue

        if currentFile[-3:].lower() != 'pdb':  # if extension is not 'pdb' then assuming this is a text file with PDB file names (one per line)
          listOfFilesIN = open(currentFile, 'r')
          for anotherCurrentFile in listOfFilesIN:
            anotherCurrentFile = anotherCurrentFile.strip()
            if len(anotherCurrentFile) > 0:
              if os.path.isfile(anotherCurrentFile):
                pdb2.append(anotherCurrentFile)
              else:
                self.die('%s\nHomologous model file %s read from the file %s does not exist; terminating' % (
                  helpMessage, anotherCurrentFile, currentFile))
        else:
          pdb2.append(currentFile)
      # Now pdb2 contains all file names and these files at least physically exist

      # checking whether the same file name was supplied once or not
      basenames = []
      for currentFile in pdb2:
        basenames.append(os.path.basename(currentFile))
      for basename in basenames:
        if basenames.count(basename) > 1:
          self.die(
            'You have supplied homologous model files with identical names: %s; files should have different names' % basename)

    # convert all file names to absolute path
    parameters.pdb1 = os.path.abspath(parameters.pdb1)
    parameters.f = os.path.abspath(parameters.f)
    pdb2 = [os.path.abspath(currentFile) for currentFile in pdb2]

    sys.stdout.write('Target structure: %s\n' % parameters.pdb1)
    sys.stdout.write('Data for target structure: %s\n' % parameters.f)
    sys.stdout.write('Supplied homologous structures: ')
    if len(pdb2) < 1:
      sys.stdout.write('None')
    else:
      for pdbName in pdb2:
        sys.stdout.write('\n' + pdbName)
    sys.stdout.write('\n')

    pdbToRefine = PDBtoRefine(parameters.pdb1, parameters.f)

    if parameters.xyzout:
      sys.stdout.write('Output PDB file name: %s\n' % parameters.xyzout)
      self.xyzout = parameters.xyzout

    if parameters.hklout:
      sys.stdout.write('Output MTZ file name: %s\n' % parameters.hklout)
      self.hklout = parameters.hklout

    if parameters.nh:
      sys.stdout.write('Maximal number of homologues (per unique chain): %d \n' % int(parameters.nh))
      self.maximalNumberOfHomologs = parameters.nh

    if len(pdb2) > self.maximalNumberOfHomologs:
      sys.stdout.write('\nWarning: too many homologs supplied (%d); using first %d homologs.\n\n' % (
        len(pdb2), self.maximalNumberOfHomologs))
      pdb2 = pdb2[:self.maximalNumberOfHomologs]

    if parameters.protocol:
      sys.stdout.write('Protocol file: %s\n' % parameters.protocol)
      if not os.path.isfile(parameters.protocol):
        self.die('%s\nProtocol file "%s" (-pr) does not exist' % (helpMessage, parameters.protocol))

      if len(pdb2) > 0:
        sys.stdout.write(
          'You supplied both protocol file (-pr option) and external homologues (-p2 option);\n  ignoring -p2 and using only homologues from the protocol file.\n\n')
      pdb2 = []

      sys.stdout.write('PDB files indicated by the protocol file: \n')

      try:
        xmlRoot = ET.parse(parameters.protocol).getroot()

        protocol = RefinementProtocol(pdbToRefine, xmlRoot.find('Best_protocol').attrib['Type'])
        protocol.description = xmlRoot.find('Best_protocol').attrib['Description']

        if xmlRoot.find('Best_protocol').attrib['Type'] not in ['ExternalRestraints', 'JellyBody', 'AllHomologs']:
          self.die('Unsupported protocol type %s found in the protocol file (-pr option); terminating' %
                   xmlRoot.find('Best_protocol').attrib['Type'])

        if xmlRoot.find('Best_protocol').attrib['Type'] == 'ExternalRestraints':
          for restraint in xmlRoot.find('Best_protocol').find('pdbChainRestraints'):
            chainRestraints = ChainRestraints(restraint.text, restraint.tag)
            for currentId in restraint.attrib.keys():
              restrainFileNameAndChain = restraint.attrib[currentId].split(';@;')
              if len(restrainFileNameAndChain) != 2:
                self.die('%s\nCant extract filename and chain from the protocol file (-pr option); terminating' % helpMessage)
              if not os.path.isfile(restrainFileNameAndChain[0]):
                self.die('%s\nFile %s specified in the protocol file %s does not exist; terminating.' % (helpMessage, restrainFileNameAndChain[0], parameters.protocol))
              sys.stdout.write(restrainFileNameAndChain[0]+'\n')
              chainRestraints.restraintList.append((restrainFileNameAndChain[0], restrainFileNameAndChain[1] ))
              pdb2.append(restrainFileNameAndChain[0])
            protocol.pdbChainRestraints.append(chainRestraints)
        elif xmlRoot.find('Best_protocol').attrib['Type'] == 'AllHomologs':
          protocol.homologsFilesList = []
          for item in xmlRoot.find('Best_protocol').find('homologsFilesList'):
            if item.tag == 'restrain':
              protocol.restraint = item.text[1:-1]
            else:
              if os.path.isfile(item.text[1:-1]):
                sys.stdout.write(item.text[1:-1]+'\n')
                protocol.homologsFilesList.append(item.text[1:-1])
              else:
                self.die('%s\nFile %s specified in the protocol file %s does not exist; terminating.' % (helpMessage, item.text[1:-1], parameters.protocol))

      except:
        self.die(
          '%s\nParsing error of the protocol file "%s" (supplied by -pr option); terminating.' % (helpMessage, parameters.protocol))

    if parameters.nc and not parameters.nc > 0:
      self.die('%s\nNumber of chains for refinement protocols (-nc) must be one or more.' % helpMessage)
    elif parameters.nc:
      sys.stdout.write('Maximal number of chains for external restraints generation: %d\n' % parameters.nc)
      self.nc = parameters.nc

    if parameters.dna:
      sys.stdout.write('Generate restraints for DNA/RNA: YES\n')
      self.dna = True

    if parameters.tls:
      if not os.path.isfile(parameters.tls):
        self.die('%s\nTLS input file "%s" (-tls) does not exist' % (helpMessage, parameters.f))
      self.tls = os.path.abspath(parameters.tls)
      sys.stdout.write('File with TLS parameters: %s\n' % self.tls)


    if parameters.ss:
      sys.stdout.write('-save_space option: saving disk space by removing ProSMART output\n')
      self.ss = True

    if parameters.phased:
      if len(parameters.labin) < 1:
        self.die('%s\nPlease specify correct -labin parameter if you wish to use phased refinement option.\n' % (helpMessage))
      sys.stdout.write('-phased option: using phased refinement\n')
      self.phased = True

    if parameters.labin:
      sys.stdout.write('Refmac LABIN option: %s\n' % parameters.labin)
      self.labin = parameters.labin

    if parameters.free_1:
      sys.stdout.write('Refmac "FREE 1" option specified for CNS-style Free flag \n')
      self.free_1 = True

    if parameters.mr:
      sys.stdout.write('Run 50-200 cycles of jelly body refinement after MR: YES\n')
      self.mr = True

    if parameters.cpu and not parameters.cpu > 0:
      self.die('%s\nNumber of CPUs used (-cpu) must be one or more.' % helpMessage)
    elif parameters.cpu:
      sys.stdout.write('Number of CPUs used: %d\n' % parameters.cpu)
      self.nCPU = parameters.cpu

    if parameters.minRes and not parameters.minRes > 1.0:
      self.die('%s\nSupplied lowest resolution for automatically downloaded homologues (%0.2f A) shall be more than 1 A; terminating\n' % (helpMessage, parameters.minRes))
    elif parameters.minRes:
      sys.stdout.write('Lowest resolution for automatically downloaded homologues: %0.2f\n' % parameters.minRes)
      self.lowestResolutionForAutoHomologues = parameters.minRes

    #checking for phenix.molprobity installation
    lpath=os.getenv('PATH')
    for p in lpath.split(os.path.pathsep):
      p=os.path.join(p,'phenix.molprobity')
      if os.path.exists(p) and os.access(p,os.X_OK):
        self.molprobity = True
        break

    if self.molprobity:
      sys.stdout.write('phenix.molprobity detected; geometrical quality will be assesed\n')
    else:
      sys.stdout.write('phenix.molprobity not found; geometrical quality will not be assesed\n')

    if parameters.auto:
      pdb2 += self.autoHomologsSelection(pdbToRefine.pdbFileName)

    return pdbToRefine, pdb2, protocol

  def autoHomologsSelection(self, pdbFileName):

    sys.stdout.write('\nAuto selection of homologues:\n')
    sys.stdout.write('Analysing protein chains: ')
    chainsAndSequences = InputAnalysis(self.outputDirectory+'ChainsAnalysis'+os.sep).getSequence(pdbFileName)

    targetSequences = []

    chainsAndSequences = sorted(chainsAndSequences, key=lambda x: x[0] ) # sorting by chain name -> A, B, C...
    sys.stdout.write(chainsAndSequences[0][0])
    for (chain, sequence) in chainsAndSequences:
      if len(list(set(list(sequence)))) < 8: # to filter out DNA/RNA chains, no BLAST search for them. 8 is a bit arbitrary - means we are BLASTing chains composed of more than 8 different aminoacids (in nucleotides shall be 4-5 nucleotide types)
        sys.stdout.write(', %s is not protein' % chain)
        continue
      if len(targetSequences) < 1:
        targetSequences.append((chain, sequence))
      else:
        addToList = True
        for (targetChain, targetSequence) in targetSequences:
          align_obj = align(seq_a=sequence, seq_b=targetSequence)
          alignment = align_obj.extract_alignment()
          numberOfIdenticalResidues = len(''.join(alignment.matches().split()))
          seqIdentity = float(numberOfIdenticalResidues) / float(max([len(sequence), len(targetSequence)]))
          if seqIdentity > MinimalSequenceIdentityForHomologs:
            addToList = False
            sys.stdout.write(', %s matches %s' % (chain, targetChain))
            break

        if addToList:
          sys.stdout.write(', ' + chain)
          targetSequences.append((chain, sequence))

    sys.stdout.write(' ...done.\n')


    pdbCodes = []
    pdbsTo2 = []
    counter = 0

    try:
      os.makedirs(self.outputDirectory + 'Download'+os.sep)
    except:
      self.die('Error while creating directory %s; terminating.' % self.outputDirectory + 'Download'+os.sep)


    sys.stdout.write('Running BLAST search... \n')

    for (targetChain, targetSequence) in targetSequences:
      if len(targetSequence) < minimalLengthOfProtein: # Everything less than this value is a peptide
        continue
      sys.stdout.write('Chain %s:\n' % targetChain)

      params = {
      'eCutOff': '10.0',
      'sequence': targetSequence,
      'matrix' : 'BLOSUM62',
      'outputFormat' : 'XML'}
      postData = urllib.urlencode(params)
      blastXML=''
      try:
          webData= urllib2.urlopen('http://www.rcsb.org/pdb/rest/getBlastPDB1?'+postData)
          blastXML = webData.read()
          webData.close()
      except:
          self.die('Internet connection error when trying to run BLAST search of the homologues; terminating.')

      try:
        blastRoot = ET.fromstring(blastXML)

        if blastRoot.find('BlastOutput_iterations') is None or \
           blastRoot.find('BlastOutput_iterations').find('Iteration') is None or \
           blastRoot.find('BlastOutput_iterations').find('Iteration').find('Iteration_hits') is None:
          continue

        for hit in blastRoot.find('BlastOutput_iterations').find('Iteration').find('Iteration_hits'):
          pdbCode = hit.find('Hit_def').text[0:4].lower()
          match = re.search(r'(.*):(.*):([^\|]*)\|',hit.find('Hit_def').text)
          if match:
            pdbChain = match.group(3).split(',')[0]

          if not match or len(pdbChain) < 1:
            self.die('Something wrong with BLAST output (cant find chains); terminating.')

          identity = float(hit.find('Hit_hsps')[0].find('Hsp_identity').text) / len(targetSequence)
          alignmentLength = float(hit.find('Hit_hsps')[0].find('Hsp_align-len').text) / len(targetSequence)

          if alignmentLength > 1:
            alignmentLength = 1
          if identity > 1:
            identity = 1

          if alignmentLength >= MinimalPercentAligned and \
                                    identity >= MinimalSequenceIdentityForHomologs:

            sys.stdout.write('%s : Aligned %d%%, Sequence identity %0.0f%% ' % (pdbCode, alignmentLength * 100, identity * 100))
            if pdbCode in pdbCodes:
              sys.stdout.write('- already analysed\n')
              continue
            else:
              pdbCodes.append(pdbCode)

            downloadedFileName = self.downloadPDB(pdbCode)

            if len(downloadedFileName) < 1:
              continue

            downloadedPDBAnalysis = InputAnalysis(self.outputDirectory+'Download'+os.sep+'Analysis_'+ pdbCode + os.sep)
            analysisResults = downloadedPDBAnalysis.matchToHomologs(downloadedFileName, [x[1] for x in pdbsTo2]+[pdbFileName],
                                                                                          pdbChain, [x[0] for x in pdbsTo2]+[targetChain], ss = self.ss)

            correctChainsFound = False
            for chain in analysisResults.keys():
              if len(analysisResults[chain]) > 0:
                correctChainsFound = True

            if correctChainsFound:
              sys.stdout.write(' - accepted for restraints generation\n')
              pdbsTo2.append((pdbChain, downloadedFileName))
              counter += 1
            else:
             sys.stdout.write(' - rejected (discrepancy between SEQ record and modelled atoms)\n')
             try:
               os.remove(downloadedFileName)
             except:
               sys.stdout.write('Unable to delete downloaded PDB file %s; trying to continue\n' % downloadedFileName)

            if counter >= self.maximalNumberOfHomologs:
              break
        counter = 0

      except ParseError:
        sys.stdout.write('Unexpected BLAST output for this chain; trying to continue.\n')
        continue

    if len(pdbsTo2) < 1:
      sys.stdout.write('No homologues with sequence identity more than %d%%, covering more than %d%% of target sequence and having RMSD with original structure more than %0.2f.'
              % (int(MinimalSequenceIdentityForHomologs * 100), int(MinimalPercentAligned*100), rmsdTreshold))
      return []
    else:
      return [x[1] for x in pdbsTo2]

  def downloadPDB(self, pdbCode):
    pdbText = ''
    try:
        webData= urllib2.urlopen('http://www.rcsb.org/pdb/files/' + pdbCode + '.pdb')
        pdbText = webData.read()
        webData.close()
    except:
        sys.stdout.write('Internet connection error when trying to fetch PDB file %s; trying next\n' % pdbCode)
        return ''

    if len(pdbText) == 0:
      sys.stdout.write('No data received for PDB file %s; trying next\n' % pdbCode)
      return ''

    flagCA = False
    currentChain = ''
    currentAltConf = ''
    resolutionMatch = False
    for pdbStr in pdbText.split('\n'):
      if not resolutionMatch:
        resolutionMatch = re.search('RESOLUTION.*(\d+\.\d+)\s', pdbStr)
        if resolutionMatch:
          if float(resolutionMatch.group(1)) > self.lowestResolutionForAutoHomologues:
            sys.stdout.write('Resolution of %s is %s A (minimal treshold %0.1f A) - rejecting; trying next\n' % (pdbCode, resolutionMatch.group(1), self.lowestResolutionForAutoHomologues))
            return ''

      if pdbStr[0:4] == 'ATOM':
        if pdbStr[13:15] != 'CA':
          flagCA = False
        if pdbStr[13:15] == 'CA' and flagCA and pdbStr[21:22] == currentChain and pdbStr[16:17] == currentAltConf:
          sys.stdout.write(' - PDB file %s has only CA atoms; rejecting\n' % pdbCode)
          return ''
        if pdbStr[13:15] == 'CA' and not flagCA:
          flagCA = True
          currentChain = pdbStr[21:22]
          currentAltConf = pdbStr[16:17]

    pdbFileName = self.outputDirectory + 'Download' + os.sep + pdbCode + '.pdb'

    try:
      pdbFile = open(pdbFileName, 'w')
      pdbFile.write(pdbText)
      pdbFile.close()
    except:
      sys.stdout.write('Error while saving PDB file %s; trying next\n' % pdbFileName)
      return ''

    return os.path.abspath(pdbFileName)

  def addProtocol(self, homologsForProtocols, numberOfHomologs=0, description='', reverse=False, allHomologs=False, restraint=''):

    if not homologsForProtocols or numberOfHomologs < 1 or description == '':
      self.die('Incorrect parameters for addProtocol call; terminating\n')

    # special case of all homologs
    if allHomologs:
      protocol = RefinementProtocol(self.pdbToRefine, 'AllHomologs')
      protocol.description = description
      protocol.homologsFilesList = homologsForProtocols
      protocol.restraint = restraint
      sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description))
      for homolog in protocol.homologsFilesList:
        sys.stdout.write('%s\n' % os.path.basename(homolog))
      sys.stdout.write('\n')
      protocol.isWindows = isWindows
      self.protocols.append(protocol)
      return

    # otherwise -  not all homologues

    maximumNumberOfAvailableHomologs = max(len(x) for x in homologsForProtocols.values())
    if numberOfHomologs > maximumNumberOfAvailableHomologs:  # simply not adding protocols
      return

    protocol = RefinementProtocol(self.pdbToRefine, 'ExternalRestraints')
    protocol.description = description
    sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description))

    for chain in sorted(homologsForProtocols.keys()):
      if len(homologsForProtocols[chain]) > 0:
        chainRestraints = ChainRestraints(chain, 'Homologue')
        if len(homologsForProtocols[chain]) < numberOfHomologs:
          n = len(homologsForProtocols[chain])
        else:
          n = numberOfHomologs

        if not reverse:
          for i in range(0, n):
            chainRestraints.restraintList.append(
              (homologsForProtocols[chain][i]['pdb'], homologsForProtocols[chain][i]['chain']))
            sys.stdout.write(
              '%s, chain %s: restraints from %s, chain %s. Sequence identity: %0.2f, local RMSD: %0.2f, global RMSD: %0.2f \n'
              % (os.path.basename(self.pdbToRefine.pdbFileName), chain,
                 os.path.basename(homologsForProtocols[chain][i]['pdb']), homologsForProtocols[chain][i]['chain'],
                 float(homologsForProtocols[chain][i]['SeqIden']) / 100,
                 float(homologsForProtocols[chain][i]['Minimum']), float(homologsForProtocols[chain][i]['RMSD']) ))
        else:
          for i in reversed(range(len(homologsForProtocols[chain]) - n, len(homologsForProtocols[chain]))):
            chainRestraints.restraintList.append(
              (homologsForProtocols[chain][i]['pdb'], homologsForProtocols[chain][i]['chain']))
            sys.stdout.write(
              '%s, chain %s: restraints from %s, chain %s. Sequence identity: %0.2f, local RMSD: %0.2f, global RMSD: %0.2f \n'
              % (os.path.basename(self.pdbToRefine.pdbFileName), chain,
                 os.path.basename(homologsForProtocols[chain][i]['pdb']), homologsForProtocols[chain][i]['chain'],
                 float(homologsForProtocols[chain][i]['SeqIden']) / 100,
                 float(homologsForProtocols[chain][i]['Minimum']), float(homologsForProtocols[chain][i]['RMSD']) ))

      else:
        chainRestraints = ChainRestraints(chain, 'Hbond')
        sys.stdout.write('%s, chain %s: H-bonds restraints\n'
                         % (os.path.basename(self.pdbToRefine.pdbFileName), chain))
      protocol.pdbChainRestraints.append(chainRestraints)
    protocol.isWindows = isWindows
    self.protocols.append(protocol)
    sys.stdout.write('\n')

  def initialiseProtocols(self, protocol=None):

    homologsForProtocols = {}

    for chain in self.pdbToRefine.chains.keys():
      homologsForProtocols[chain] = []
      homologs = sorted(self.pdbToRefine.chains[chain],
                        key=lambda x: float(x['RMSD']) + float(x['Minimum']))  # ranging homologs by their similarity to the refined structure
      for homolog in homologs:
        if float(homolog['SeqIden']) / 100 > MinimalSequenceIdentityForHomologs and \
           float(homolog['Percent']) / 100 > MinimalPercentAligned:
          homologsForProtocols[chain].append(homolog)

    if protocol: # protocol file was specified by user in the command line
      sys.stdout.write('Using protocol specified by user: \n')
      sys.stdout.write('1 - %s\n' % protocol.description)

      try:
        for chainRestraint in protocol.pdbChainRestraints:
          if chainRestraint.type == 'Homologue':
            for (pdbFileNameHomologue, chainHomologue) in chainRestraint.restraintList:
              if pdbFileNameHomologue not in [x['pdb'] for x in homologsForProtocols[chainRestraint.chain]] or \
                       chainHomologue not in [x['chain'] for x in homologsForProtocols[chainRestraint.chain]]:
                self.die(
                  'Homologue and chain (%s, %s) from the protocol file (-pr option) does not exist\n or have sequence identity to the target chain lower than %0.2f; terminating'
                  % (os.path.basename(pdbFileNameHomologue), chainHomologue, MinimalSequenceIdentityForHomologs))

              sys.stdout.write('%s, chain %s: restraints from %s, chain %s. \n'
                               % (os.path.basename(self.pdbToRefine.pdbFileName), chainRestraint.chain,
                                  os.path.basename(pdbFileNameHomologue), chainHomologue ))
          elif chainRestraint.type == 'Hbond':
            sys.stdout.write('%s, chain %s: H-bonds restraints\n'
                               % (os.path.basename(self.pdbToRefine.pdbFileName), chainRestraint.chain))
      except KeyError:
        self.die('Target chain, indicated in the protocol file (-pr option), does not exist; terminating')

      self.protocols.append(protocol)
      sys.stdout.write('\n')

    else: # default set of protocols
      sys.stdout.write('Using default set of protocols for refinement:\n\n')

      # Protocol 1  - always JellyBody
      protocol = RefinementProtocol(self.pdbToRefine, 'JellyBody')
      protocol.description = 'Jelly body only'
      sys.stdout.write('1 - %s\n\n' % protocol.description)
      protocol.isWindows = isWindows
      self.protocols.append(protocol)

      # Protocols with external homologues
      if len (homologsForProtocols.values()) > 0: # if homologues are actually there

        if self.nc > max(len(x) for x in homologsForProtocols.values()):
          self.nc = max(len(x) for x in homologsForProtocols.values()) # maximal number of chains cant exceed actual number of available chains

      # Protocols with closest external homologues (from one up to three by default)

        for i in range(self.nc):
          self.addProtocol(homologsForProtocols, numberOfHomologs=i + 1,
                           description='External restraints from %d closest homologue(s) (H-bonds for chains without suitable homologue), then jelly body' % (
                             i + 1))

      # Protocols with all homologues (restrain-best and restrain-all)
        if max(len(x) for x in homologsForProtocols.values()) > 3 \
            and self.nc < max(len(x) for x in homologsForProtocols.values()):

          pdbFileNames = []
          for homologList in homologsForProtocols.values():
            for homolog in homologList:
              pdbFileNames.append(homolog['pdb'])
          pdbFileNames = list(set(pdbFileNames)) # removing duplicates


          self.addProtocol(pdbFileNames, numberOfHomologs=len(self.homologsFilesList), allHomologs=True,
                           description='External restraints from all the homologues (-restrain_all), then jelly body', restraint='-restrain_all')
          self.addProtocol(pdbFileNames, numberOfHomologs=len(self.homologsFilesList), allHomologs=True,
                           description='External restraints from all the homologues (-restrain_best), then jelly body', restraint='-restrain_best')


      #Protocols with .nc most distant chains
        if self.nc < max(len(x) for x in homologsForProtocols.values()):
          for i in range(self.nc):
            self.addProtocol(homologsForProtocols, numberOfHomologs=i + 1, reverse=True,
                             description='External restraints from %d most distant homologue(s) (H-bonds for chains without suitable homologue), then jelly body' % (
                               i + 1))

      # Last protocol  - hydrogen bonds
      protocol = RefinementProtocol(self.pdbToRefine, 'ExternalRestraints')
      protocol.description = 'Hydrogen bonds restraints for all chains'
      sys.stdout.write('%d - %s: \n' % (len(self.protocols) + 1, protocol.description))
      for chain in sorted(homologsForProtocols.keys()):
        chainRestraints = ChainRestraints(chain, 'Hbond')
        sys.stdout.write('%s, chain %s: H-bonds restraints\n'
                         % (os.path.basename(self.pdbToRefine.pdbFileName), chain))
        protocol.pdbChainRestraints.append(chainRestraints)
      protocol.isWindows = isWindows
      self.protocols.append(protocol)
      sys.stdout.write('\n')

  def initialise(self, arguments):

    print headerMessage

    (self.pdbToRefine, self.homologsFilesList, protocol) = self.parseCommandLine(arguments)

    sys.stdout.write('\nAnalysing input files: \n')
    inputAnalysis = InputAnalysis(self.outputDirectory + 'InputAnalysis'+os.sep)
    inputAnalysis.labin = self.labin
    inputAnalysis.free_1 = self.free_1
    inputAnalysis.molprobity = self.molprobity

    sys.stdout.write('Analysing data for twinning... ')
    self.pdbToRefine.twin = inputAnalysis.twinTest(self.pdbToRefine)

    if self.pdbToRefine.twin:
      sys.stdout.write('twinning detected; will use "TWIN" keyword for refmac.\n')
    else:
      sys.stdout.write('no twinning detected.\n')

    if self.mr:
      sys.stdout.write('Running jelly body refinement after MR (it may take a while)... \n')
      jellyMR = inputAnalysis.mr(self.pdbToRefine)
      self.pdbToRefine.pdbFileName = os.path.abspath(jellyMR.xyzout)
      sys.stdout.write('Started from  Rfact: %0.3f and Rfree: %0.3f\n' % (
      jellyMR.log.getStartRfact(), jellyMR.log.getStartRfree() ) )
      sys.stdout.write('Finished with Rfact: %0.3f and Rfree: %0.3f\n' % (
      jellyMR.log.getFinalRfact(), jellyMR.log.getFinalRfree() ))
      sys.stdout.write('\n')

    sys.stdout.write('Checking best scaling and solvent options... ')
    self.pdbToRefine = inputAnalysis.scaleTestAndStartingValues(self.pdbToRefine)

    if self.pdbToRefine.scalingLSQ:
      sys.stdout.write('LSQ scaling performed better; will use LSQ scaling for refmac.\n')
    else:
      sys.stdout.write('standard scaling performed better; will use standard scaling for refmac.\n')

    sys.stdout.write('Using following solvent parameters: VDWProb %0.2f, IONProb %0.2f, RSHRink %0.2f\n' % (
      self.pdbToRefine.solvent['VDWProb'], self.pdbToRefine.solvent['IONProb'],self.pdbToRefine.solvent['RSHRink'] ))

    sys.stdout.write('Starting from Rfact: %0.3f and Rfree: %0.3f\n' % (
      self.pdbToRefine.startingValues['Rfact'], self.pdbToRefine.startingValues['Rfree']))

    if self.molprobity:
      sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' %
      (self.pdbToRefine.startingValues['ramaOutliers'], self.pdbToRefine.startingValues['ramaFavoured'], self.pdbToRefine.startingValues['clash_percentile'], self.pdbToRefine.startingValues['molprobity_percentile'] ) )

    if (not protocol) or (protocol and protocol.type != 'AllHomologs'):
      sys.stdout.write('Checking external homologs... ')
      self.pdbToRefine.chains = inputAnalysis.matchToHomologs(self.pdbToRefine.pdbFileName, self.homologsFilesList, ss=self.ss)
      sys.stdout.write('done.\n')

    sys.stdout.write('Initialising refinement protocol(s)...\n')

    if protocol:
      self.initialiseProtocols(protocol=protocol)
    else:
      self.initialiseProtocols()

    sys.stdout.write('Refinement protocol(s) successfully initialised.\n')

  def startProtocol(self, protocol=None, dnaRestraintsFileName=''):

    if not protocol:
      self.die('No protocol supplied to the runProtocol routine')

    sys.stdout.write('\nStarting protocol %d out of %d (%s)\n' % (self.protocols.index(protocol) + 1, len(self.protocols), protocol.description),)
    originalDirectory = os.getcwd()
    protocol.containerCmdFileName = 'protocol_%d_of_%d.%s' % (self.protocols.index(protocol) + 1, len(self.protocols), comFileExtension)

    try:
      protocol.directory = self.outputDirectory + 'protocol_%d_of_%d' % (
        self.protocols.index(protocol) + 1, len(self.protocols))
      if not os.path.exists(protocol.directory):
        os.makedirs(protocol.directory)
        os.chdir(protocol.directory)
      else:
        self.die('Protocol directory already exists; terminating. Please make sure you run pipeline in a new directory.')
    except OSError:
      self.die('Can`t create directory ' + protocol.directory + '; terminating.')

    if protocol.type == 'JellyBody':
      refmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName,
                      xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1jelly.pdb',
                      hklin=protocol.pdbToRefine.mtzFileName,
                      hklout=os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1jelly.mtz',
                      ncyc=20)
      if not refmac.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      refmac.twin = protocol.pdbToRefine.twin
      refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      refmac.solvent = protocol.pdbToRefine.solvent
      if len(self.tls) > 1:
        refmac.tls = self.tls
      if len(self.labin) > 1:
        refmac.labin = self.labin
      refmac.free_1 = self.free_1
      refmac.phased = self.phased
      refmac.ncs = True
      refmac.jellyBody = True
      refmac.molprobity = self.molprobity
      refmac.isWindows = isWindows

      protocol.ss = self.ss
      protocol.containerCmd.append('%s' % refmac.comFileName)
      protocol.containerCmd.append('\n\n')
      protocol.refmacs.append(refmac)
      protocol.started = True
      # protocol.runContainer()
      p = multiprocessing.Process(target=protocol.run)
      p.start()

    elif protocol.type == 'ExternalRestraints':

      restraintsFileName = os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_restraints.txt'

      for chainRestraint in protocol.pdbChainRestraints:
        prosmart = ProSMART(pdbToRefine=protocol.pdbToRefine, chainRestraint=chainRestraint)
        prosmart.isWindows = isWindows
        prosmart.prepareRun()
        protocol.prosmarts.append(prosmart)
        protocol.containerCmd.append('%s \n' % prosmart.comFileName)
        if not isWindows:
          protocol.containerCmd.append('cat %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName))
        else:
          protocol.containerCmd.append('type %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName))
        protocol.restraintsFileName = restraintsFileName

      if os.path.exists(dnaRestraintsFileName):
        if not isWindows:
          protocol.containerCmd.append('cat %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName))
        else:
          protocol.containerCmd.append('type %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName))
        protocol.dnaRestraintsFileName = dnaRestraintsFileName


      #sys.stdout.write('Running Refmac5 with external restraints... ')
      erhRefmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName,
                         xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1ExtRestr.pdb',
                         hklin=protocol.pdbToRefine.mtzFileName,
                         hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1ExtRestr.mtz',
                         ncyc=40)
      if not erhRefmac.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      erhRefmac.twin = protocol.pdbToRefine.twin
      erhRefmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      erhRefmac.solvent = protocol.pdbToRefine.solvent
      erhRefmac.phased = self.phased
      erhRefmac.proSMARTParams = True
      erhRefmac.proSMARTFile = restraintsFileName
      if len(self.tls) > 1:
        erhRefmac.tls = self.tls
      if len(self.labin) > 1:
        erhRefmac.labin = self.labin
      erhRefmac.free_1 = self.free_1
      erhRefmac.molprobity = self.molprobity
      erhRefmac.isWindows = isWindows

      protocol.refmacs.append(erhRefmac)
      protocol.containerCmd.append('%s \n' % erhRefmac.comFileName)


      erhRefmac2 = Refmac(xyzin=protocol.pdbToRefine.pdbFileName,
                         xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_11ExtRestr.pdb',
                         hklin=protocol.pdbToRefine.mtzFileName,
                         hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_11ExtRestr.mtz',
                         ncyc=40)
      if not erhRefmac2.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      erhRefmac2.twin = protocol.pdbToRefine.twin
      erhRefmac2.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      erhRefmac2.solvent = protocol.pdbToRefine.solvent
      erhRefmac2.phased = self.phased
      erhRefmac2.proSMARTParams = False
      erhRefmac2.proSMARTFile = restraintsFileName
      if len(self.tls) > 1:
        erhRefmac2.tls = self.tls
      if len(self.labin) > 1:
        erhRefmac2.labin = self.labin
      erhRefmac2.free_1 = self.free_1
      erhRefmac2.molprobity = self.molprobity
      erhRefmac2.isWindows = isWindows

      protocol.refmacs.append(erhRefmac2)
      protocol.containerCmd.append('%s \n' % erhRefmac2.comFileName)


      refmac = Refmac(xyzin=erhRefmac.xyzout,
                      xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_2jelly.pdb',
                      hklin=protocol.pdbToRefine.mtzFileName,
                      hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_2jelly.mtz',
                      ncyc=20)
      if not refmac.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      refmac.twin = protocol.pdbToRefine.twin
      refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      refmac.solvent = protocol.pdbToRefine.solvent
      refmac.phased = self.phased
      refmac.jellyBody = True
      refmac.ncs = True
      if len(self.tls) > 1:
        refmac.tls = self.tls
      if len(self.labin) > 1:
        refmac.labin = self.labin
      refmac.free_1 = self.free_1
      refmac.molprobity = self.molprobity
      refmac.isWindows = isWindows

      protocol.refmacs.append(refmac)

      protocol.ss = self.ss
      protocol.containerCmd.append('%s \n' % refmac.comFileName)
      protocol.containerCmd.append('\n')
      protocol.started = True
      #protocol.runContainer()

      p = multiprocessing.Process(target=protocol.run)
      p.start()

    elif protocol.type == 'AllHomologs':

      restraintsFileName = os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_restraints.txt'

      prosmart = ProSMART(pdbToRefine=protocol.pdbToRefine, allHomologs=True, allHomologsList=protocol.homologsFilesList ,
                          restraint=protocol.restraint)
      prosmart.isWindows = isWindows
      prosmart.prepareRun()

      protocol.prosmarts.append(prosmart)
      protocol.containerCmd.append('%s \n' % prosmart.comFileName)

      if not isWindows:
        protocol.containerCmd.append('cat %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName))
      else:
        protocol.containerCmd.append('type %s >> .%s%s\n' % (prosmart.restraintsFileName, os.sep, restraintsFileName))
      protocol.restraintsFileName = restraintsFileName

      if os.path.exists(dnaRestraintsFileName):
        if not isWindows:
          protocol.containerCmd.append('cat %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName))
        else:
          protocol.containerCmd.append('type %s >> .%s%s\n' % (dnaRestraintsFileName, os.sep, restraintsFileName))
        protocol.dnaRestraintsFileName = dnaRestraintsFileName

      #sys.stdout.write('Running Refmac5 with external restrains... ')
      erhRefmac = Refmac(xyzin=protocol.pdbToRefine.pdbFileName,
                         xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_1ExtRestr.pdb',
                         hklin=protocol.pdbToRefine.mtzFileName,
                         hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_1ExtRestr.mtz',
                         ncyc=40)
      if not erhRefmac.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      erhRefmac.twin = protocol.pdbToRefine.twin
      erhRefmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      erhRefmac.solvent = protocol.pdbToRefine.solvent
      erhRefmac.phased = self.phased
      erhRefmac.proSMARTParams = True
      erhRefmac.proSMARTFile = restraintsFileName
      if len(self.tls) > 1:
        erhRefmac.tls = self.tls
      if len(self.labin) > 1:
        erhRefmac.labin = self.labin
      erhRefmac.free_1 = self.free_1
      erhRefmac.molprobity = self.molprobity
      erhRefmac.isWindows = isWindows

      protocol.refmacs.append(erhRefmac)
      protocol.containerCmd.append('%s \n' % erhRefmac.comFileName)


      erhRefmac2 = Refmac(xyzin=protocol.pdbToRefine.pdbFileName,
                         xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_11ExtRestr.pdb',
                         hklin=protocol.pdbToRefine.mtzFileName,
                         hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_11ExtRestr.mtz',
                         ncyc=40)
      if not erhRefmac2.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      erhRefmac2.twin = protocol.pdbToRefine.twin
      erhRefmac2.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      erhRefmac2.solvent = protocol.pdbToRefine.solvent
      erhRefmac2.phased = self.phased
      erhRefmac2.proSMARTParams = False
      erhRefmac2.proSMARTFile = restraintsFileName
      if len(self.tls) > 1:
        erhRefmac2.tls = self.tls
      if len(self.labin) > 1:
        erhRefmac2.labin = self.labin
      erhRefmac2.free_1 = self.free_1
      erhRefmac2.molprobity = self.molprobity
      erhRefmac2.isWindows = isWindows

      protocol.refmacs.append(erhRefmac2)
      protocol.containerCmd.append('%s \n' % erhRefmac2.comFileName)


      refmac = Refmac(xyzin=erhRefmac.xyzout,
                      xyzout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.pdbFileName)[:-4] + '_2jelly.pdb',
                      hklin=erhRefmac.hklin,
                      hklout= os.getcwd() + os.sep + os.path.basename(protocol.pdbToRefine.mtzFileName)[:-4] + '_2jelly.mtz',
                      ncyc=20)
      if not refmac.successfullyLoaded:
        self.die('Refmac run initialisation failed')

      refmac.twin = protocol.pdbToRefine.twin
      refmac.scalingLSQ = protocol.pdbToRefine.scalingLSQ
      refmac.solvent = protocol.pdbToRefine.solvent
      refmac.phased = self.phased
      refmac.jellyBody = True
      refmac.ncs = True
      if len(self.tls) > 1:
        refmac.tls = self.tls
      if len(self.labin) > 1:
        refmac.labin = self.labin
      refmac.free_1 = self.free_1
      refmac.molprobity = self.molprobity
      refmac.isWindows = isWindows

      protocol.ss = self.ss
      protocol.refmacs.append(refmac)
      protocol.containerCmd.append('%s \n' % refmac.comFileName)
      protocol.containerCmd.append('\n')
      protocol.started = True
      #protocol.runContainer()
      protocol.isWindows = isWindows

      p = multiprocessing.Process(target=protocol.run)
      p.start()

    else:
      self.die('Unknown protocol type %s - normally this should not happen; terminating.' % protocol.type)

    try:
      os.chdir(originalDirectory)
    except OSError:
      self.die('Can`t return back to original directory ' + originalDirectory + ' ; terminating.')


  def runProtocols(self):
    sys.stdout.write('\nRunning refinement protocol(s)...\n')

    dnaRestraintsFile = ''

    if self.dna:
      prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=self.homologsFilesList ,
                          dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep)
      prosmart.prepareRun()
      prosmart.run()
      if prosmart.executed:
        if os.path.exists(prosmart.restraintsFileName):
          if os.path.getsize(prosmart.restraintsFileName) > 1:
            sys.stdout.write('DNA/RNA detected; restraints from homologues generated.\n')
            dnaRestraintsFile = prosmart.restraintsFileName
          else:
            prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=None,
                            dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep)
            prosmart.prepareRun()
            prosmart.run()
            if prosmart.executed:
              if os.path.exists(prosmart.restraintsFileName):
                if os.path.getsize(prosmart.restraintsFileName) > 1:
                  sys.stdout.write('DNA/RNA detected; self-restraints generated.\n')
                  dnaRestraintsFile = prosmart.restraintsFileName

        else:
          prosmart = ProSMART(pdbToRefine=self.pdbToRefine, allHomologsList=None,
                          dna=True, outputDirectoryBase=self.outputDirectory+'InputAnalysis' + os.sep)
          prosmart.prepareRun()
          prosmart.run()
          if prosmart.executed:
            if os.path.exists(prosmart.restraintsFileName):
              if os.path.getsize(prosmart.restraintsFileName) > 1:
                sys.stdout.write('DNA/RNA detected; self-restraints generated.\n')
                dnaRestraintsFile = prosmart.restraintsFileName


    runningProtocols = []
    self.freeCPUs = self.nCPU

    for protocol in list(self.protocols):

      self.startProtocol(protocol=protocol, dnaRestraintsFileName=dnaRestraintsFile)
      runningProtocols.append(protocol)
      self.freeCPUs -= 1

      if self.freeCPUs < 1: # no more free CPUs left
        while True: # infinite waiting loop until one of the protocols is finished
          time.sleep(timeQuant)

          for protocolPoll in list(runningProtocols): # polling all running protocols
            if not protocolPoll.isRunning():
              if protocolPoll.executed:
                sys.stdout.write('\nProtocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description))
                sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' %
                                   (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() ))
                if self.molprobity:
                  sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' %
                         (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) )

                runningProtocols.remove(protocolPoll)
                self.freeCPUs += 1
                break # breaking polling of running protocols since one protocol is finished

              else: # protocol is not running and is not executed means either refmac5 or ProSMART is hanging - need to kill it...
                sys.stdout.write('\nProtocol "%s" is not responding; terminating it and proceeding to the next\n' % protocolPoll.description)
                protocolPoll.terminate()
                runningProtocols.remove(protocolPoll)
                self.protocols.remove(protocolPoll)
                self.freeCPUs += 1
                break # breaking polling of running protocols since one protocol was just killed

          if self.freeCPUs >= 1:
            break # breaking infinite waiting loop to start next protocol


    while runningProtocols: # let all running protocols to finish
      time.sleep(timeQuant)

      for protocolPoll in list(runningProtocols): # polling all running protocols
        if not protocolPoll.isRunning():
          if protocolPoll.executed:
            sys.stdout.write('\nProtocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description))
            sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' %
                               (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() ))
            if self.molprobity:
              sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' %
                     (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) )

            runningProtocols.remove(protocolPoll)
            self.freeCPUs += 1
            break # breaking polling of running protocols since one protocol is finished

          else: # protocol is not running and is not executed means either refmac5 or ProSMART is hanging - need to kill it...
            sys.stdout.write('Protocol "%s" is not responding; terminating it and proceeding to the next\n' % protocolPoll.description)
            protocolPoll.terminate()
            runningProtocols.remove(protocolPoll)
            self.protocols.remove(protocolPoll)
            self.freeCPUs += 1
            break # breaking polling of running protocols since one protocol was just killed


    #
    # No sensible code below this line in this method
    #
    #
    # while self.freeCPU < self.nCPU:
    #   busyCPU = 0
    #   for protocolPoll in self.protocols:
    #     if protocolPoll.started:
    #       time.sleep(timeQuant)
    #       if protocolPoll.isRunning():
    #         busyCPU += 1
    #       else:
    #         if not protocolPoll.executed:
    #           p = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE)
    #           out, err = p.communicate()
    #
    #           for line in out.splitlines():
    #             if ('refmac5' in line) and (self.outputDirectory[1:] in line):
    #               pid = int(line.split()[1])
    #               os.kill(pid, signal.SIGKILL)
    #             if ('prosmart' in line) and (self.outputDirectory[1:] in line):
    #               pid = int(line.split()[1])
    #               os.kill(pid, signal.SIGKILL)
    #
    #           self.die('Protocol "%s" is not responding; terminating' % protocolPoll.description)
    #         else:
    #           sys.stdout.write('Protocol %d out of %d (%s) finished.\n' % (self.protocols.index(protocolPoll) + 1, len(self.protocols), protocolPoll.description))
    #           sys.stdout.write('Rfact: %0.3f, Rfree: %0.3f \n' %
    #                            (protocolPoll.refmacs[-1].log.getFinalRfact(), protocolPoll.refmacs[-1].log.getFinalRfree() ))
    #           if self.molprobity:
    #             sys.stdout.write('Ramachandran outliers: %0.2f%%\nRamachandran favoured: %0.2f%%\nClashscore percentile: %0.2f\nMolprobity percentile: %0.2f\n\n' %
    #               (protocolPoll.refmacs[-1].log.getRamaOutliers(), protocolPoll.refmacs[-1].log.getRamaFavoured(), protocolPoll.refmacs[-1].log.getClashscore_percentile(), protocolPoll.refmacs[-1].log.getMolprobity_percentile() ) )
    #
    #           protocolPoll.started = False
    #   self.freeCPU = self.nCPU - busyCPU


  def returnBest(self):

    if self.molprobity:

      sys.stdout.write('\n\nStatistics on all protocols:\n')

      qfactDict = {}
      qfactProtocols = []

      rFreeMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree()
      rFreeMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree()
      molpPrcntMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getMolprobity_percentile()).refmacs[-1].log.getMolprobity_percentile()
      molpPrcntMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getMolprobity_percentile()).refmacs[-1].log.getMolprobity_percentile()

      sys.stdout.write('\nWorst Rfree value: %0.3f, Best Rfree value: %0.3f \nWorst Molprobity percentile: %0.2f, Best Molprobity percentile: %0.2f\n\n' % (rFreeMax, rFreeMin, molpPrcntMin, molpPrcntMax) )

      if (molpPrcntMax - molpPrcntMin) >= 0.1:
        weightTerm = (rFreeMax - rFreeMin) / (molpPrcntMax - molpPrcntMin)
      else:
        weightTerm = 0

      for protocol in self.protocols:
        nProtocol = self.protocols.index(protocol) + 1
        rfree = protocol.refmacs[-1].log.getFinalRfree()
        molPercentile = protocol.refmacs[-1].log.getMolprobity_percentile()
        qfact = rfree + weightTerm * (molpPrcntMax - molPercentile)

        qfactDict['qfact'] = qfact
        qfactDict['rfree'] = rfree
        qfactDict['index'] = nProtocol
        qfactDict['molPercentile'] = molPercentile
        sys.stdout.write('Protocol %d : Rfree %0.3f, Molprobity percentile %0.2f, Qscore %0.2f\n' % (nProtocol, rfree, molPercentile, qfact*100))
        qfactProtocols.append(copy.deepcopy(qfactDict))

      sortedQscoreProtocols = sorted(qfactProtocols,
                               key=lambda x: (x['qfact'], 100 - x['molPercentile']))

#      sortedProtocols = sorted(self.protocols,
#                             key=lambda x: min(x.refmacs, key=lambda y: y.log.getMolprobity()).log.getMolprobity())
      self.bestProtocol = self.protocols[sortedQscoreProtocols[0]['index'] - 1]

      #self.bestRefmacRun = min(self.bestProtocol.refmacs, key=lambda y: y.log.getMolprobity())
      self.bestRefmacRun = self.bestProtocol.refmacs[-1]
      sys.stdout.write('\n\nBest performing protocol (basing on Qscore):\n %d - %s\n' %
                    (self.protocols.index(self.bestProtocol) + 1, self.bestProtocol.description))

      if self.bestRefmacRun.log.getFinalRfree() > self.pdbToRefine.startingValues['Rfree']:
        sys.stdout.write('Sorry, this program failed to improve Rfree value for your structure.\n')

      sys.stdout.write('Rfact (before/after): %0.3f / %0.3f \nRfree (before/after): %0.3f / %0.3f\n' %
                     ( self.pdbToRefine.startingValues['Rfact'], self.bestRefmacRun.log.getFinalRfact(),
                       self.pdbToRefine.startingValues['Rfree'], self.bestRefmacRun.log.getFinalRfree() ))

      sys.stdout.write('Ramachandran outliers (before/after): %0.2f%% / %0.2f%%\nRamachandran favoured (before/after): %0.2f%% / %0.2f%%\nClashscore percentile (before/after): %0.2f / %0.2f\nMolprobity percentile (before/after): %0.2f / %0.2f\n\n' %
                   (self.pdbToRefine.startingValues['ramaOutliers'], self.bestRefmacRun.log.getRamaOutliers(),
                    self.pdbToRefine.startingValues['ramaFavoured'], self.bestRefmacRun.log.getRamaFavoured(),
                    self.pdbToRefine.startingValues['clash_percentile'], self.bestRefmacRun.log.getClashscore_percentile(),
                    self.pdbToRefine.startingValues['molprobity_percentile'], self.bestRefmacRun.log.getMolprobity_percentile()  ) )


    else:

      sys.stdout.write('\n\nStatistics on all protocols:\n')

      rFreeMax = max(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree()
      rFreeMin = min(self.protocols, key=lambda y: y.refmacs[-1].log.getFinalRfree()).refmacs[-1].log.getFinalRfree()

      sys.stdout.write('\nWorst Rfree value: %0.3f, Best Rfree value: %0.3f \n\n' % (rFreeMax, rFreeMin) )

      for protocol in self.protocols:
        nProtocol = self.protocols.index(protocol) + 1
        rfree = protocol.refmacs[-1].log.getFinalRfree()
        sys.stdout.write('Protocol %d : Rfree %0.3f\n' % (nProtocol, rfree))

      sortedProtocols = sorted(self.protocols, key=lambda x: x.refmacs[-1].log.getFinalRfree() )
      self.bestProtocol = sortedProtocols[0]
      self.bestRefmacRun = self.bestProtocol.refmacs[-1]

      sys.stdout.write('\n\nBest performing protocol (basing on Rfree value):\n %d - %s\n' %
                    (self.protocols.index(self.bestProtocol) + 1, self.bestProtocol.description))

      if self.bestRefmacRun.log.getFinalRfree() > self.pdbToRefine.startingValues['Rfree']:
        sys.stdout.write('Sorry, this program failed to improve Rfree value for your structure.\n')

      sys.stdout.write('Rfact (before/after): %0.3f / %0.3f \nRfree (before/after): %0.3f / %0.3f\n' %
                     ( self.pdbToRefine.startingValues['Rfact'], self.bestRefmacRun.log.getFinalRfact(),
                       self.pdbToRefine.startingValues['Rfree'], self.bestRefmacRun.log.getFinalRfree() ))


    xmlRoot = ET.Element('Best_protocols_root')
    xmlBestProtocol = ET.SubElement(xmlRoot, 'Best_protocol')
    xmlBestProtocol.set('Type', self.bestProtocol.type)
    xmlBestProtocol.set('Description', self.bestProtocol.description)

    if self.bestProtocol.type == 'AllHomologs':
      xmlBPhomologsFilesList = ET.SubElement(xmlBestProtocol, 'homologsFilesList')
      nextElement = ET.SubElement(xmlBPhomologsFilesList, 'restrain')
      nextElement.text = '"%s"' % self.bestProtocol.restraint
      for homolog in self.bestProtocol.homologsFilesList:
        nextElement = ET.SubElement(xmlBPhomologsFilesList, "id%d" % self.bestProtocol.homologsFilesList.index(homolog))
        nextElement.text = '"%s"'% homolog

    else:
      xmlBPpdbChainRestraints = ET.SubElement(xmlBestProtocol, 'pdbChainRestraints')
      for pdbChainRestraint in self.bestProtocol.pdbChainRestraints:
        nextElement = ET.SubElement(xmlBPpdbChainRestraints, pdbChainRestraint.type)
        nextElement.text = pdbChainRestraint.chain
        i = 1
        for (pdbFileName, chain) in pdbChainRestraint.restraintList:
          nextElement.set("id%d" % i, '%s;@;%s' % (pdbFileName, chain))
          i += 1

    indent(xmlRoot)
    xmlBestProtocolsTree = ET.ElementTree(xmlRoot)
    xmlBestProtocolsTree.write(self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.protocol')

    try:
      if self.xyzout:
        self.bestPDB = self.xyzout
      else:
        self.bestPDB = self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.pdb'

      if self.hklout:
        self.bestMTZ = self.hklout
      else:
        self.bestMTZ = self.outputDirectory + os.path.basename(self.pdbToRefine.pdbFileName)[:-4] + '_best.mtz'

      shutil.copyfile(self.bestRefmacRun.xyzout, self.bestPDB)
      shutil.copyfile(self.bestRefmacRun.hklout, self.bestMTZ)

      shutil.copytree(self.bestProtocol.directory, self.outputDirectory + 'protocol_best'+ os.sep)

    except IOError:
      self.die('Can`t copy pdb and mtz files of the best performing protocol; terminating.')

    sys.stdout.write('\nFinal PDB file: %s \n' % os.path.abspath(self.bestPDB) )
    sys.stdout.write('Final MTZ file: %s \n\n' % os.path.abspath(self.bestMTZ) )


if __name__ == "__main__":

#######################################
####  Adjustable global parameters ####
#######################################

  minimalLengthOfProtein = 30 #amino acids
  rmsdTreshold = 0.1 # structures with RMSD to the target structure lower than this treshold will be rejected as almost identical
  MinimalSequenceIdentityForHomologs = 0.75
  MinimalPercentAligned = 0.75

# This waiting time could be increased for unusually large complexes (up to 30-60 seconds, if not more)
  timeQuant = 10 # seconds, sleeping time before re-polling all running jobs

# Windows compatibility
  shellHeader = '#!/bin/sh\n\n' # for Linux/MacOS
  comFileExtension = 'com'
  isWindows = False

#######################################

  if 'Windows' in platform.system():
    isWindows = True
    shellHeader = ''
    comFileExtension = 'bat'

  pipeline = Pipeline()
  pipeline.initialise(sys.argv[1:])
  pipeline.runProtocols()
  pipeline.returnBest()