#!/usr/bin/python
# Python script 
#
# This script will be called from the web server 
# to start balbes on the linux cluster

import os, os.path, sys
import glob, re, shutil
import math
import string
import fpformat
import time
import random
import select, fcntl, subprocess
import socket

if not os.environ.has_key("BALBES_ROOT"):
    print "balbes.setup is not sourced. Re-install BALBES and don't "
    print "forget do what setup.py reminds you to do"
    sys.exit() 

# modules (XML DOM) related to handle XML style files
xml_path1= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom")
xml_path2= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom","ext")
xml_path3= os.path.join(os.getenv("BALBES_ROOT"),"PyXML-0.8.4","xml","dom","ext","reader")
sys.path.append(xml_path1)
sys.path.append(xml_path2)
sys.path.append(xml_path3)

from StripXml import StripXml
import PyExpat
from xml.parsers.expat  import ExpatError


# modules defined for structural hierachy
py_path= os.path.join(os.getenv("BALBES_ROOT"),"bin_py")
sys.path.append(py_path)


class CheckSG :
    
     def __init__( self, t_mtz_name, u_dir):
       
         self.symInfo_name = ""
         self.u_dir = u_dir
         if not glob.glob(self.u_dir):
             os.mkdir(self.u_dir)

         self.hkl_infile_name = ""
         self.in_file_format = ""
         if glob.glob(t_mtz_name):
             self.hkl_infile_name = t_mtz_name
             self.in_file_format  = self.hkl_infile_name.strip().split("/")[-1].split(".")[-1].strip()
     
         else :
             print "can not find the input mtz file for reading"
             sys.exit(1)
     
         self.user_sg = ""
         
         self.sg_dict = {}
         self.new_hkl_file_names = []


     def get_userSG(self):

         if self.in_file_format == "cif" or self.in_file_format == "ent":
             self.hkl_infile_name = self.cifToMtz()
             if not self.hkl_infile_name:
                 print "The input cif file can not be transfered into a mtz file "
                 print "Progarm stopped "
      
         self.user_mtz_para_name = self.u_dir + "/input_mtz.para"

         mtzdump_cmdline = "mtzdump HKLin %s << eof  > %s \n" \
                           %(self.hkl_infile_name, self.user_mtz_para_name)

	 mtzdump_cmdline = mtzdump_cmdline + "Run\n"
         mtzdump_cmdline = mtzdump_cmdline + "eof\n"
         os.system(mtzdump_cmdline)
         
         time.sleep(1)

         if glob.glob(self.user_mtz_para_name):
             user_mtz_para = open(self.user_mtz_para_name, "r")
             for line in user_mtz_para.readlines():
                 if line.find("Space group") != -1 :
                     self.user_sg = line.strip().split("=")[-1].strip().split("(")[0].strip() 
                     self.user_sg = self.user_sg[1:-1]
                     # May need to process further, see what the table is like
                     # print "Space group in the user's mtz is ", self.user_sg
                     break
             user_mtz_para.close()

         if not self.user_sg :
             print "Can't find what the space group is in the input mtz file"
             sys.exit(1)


     def alt_sg_dict(self):

         self.alt_sg_list_exe = os.getenv("BALBES_ROOT") + "/bin/alt_sg_list" 

         self.alt_sg_list_log_name = os.path.join(self.u_dir, "alt_sg_list.log")
         # creat the batch file
         self.alt_sg_list_bat_name  = os.path.join(self.u_dir, "alt_sg_list1.bat")

         try:
             alt_sg_list_bat = open(self.alt_sg_list_bat_name,"w")
         except IOError:
             print self.alt_sg_list_bat_name, " could not be opened for write"
             sys.exit(1)
         else:

             print >> alt_sg_list_bat, " %s << stop > %s " % (self.alt_sg_list_exe, self.alt_sg_list_log_name)
             print >> alt_sg_list_bat, "_DOC Y> %s " % (self.u_dir)
             print >> alt_sg_list_bat, "_PATH_SCR %s " % (self.u_dir + "/" )
             print >> alt_sg_list_bat, "_SG %s"   %self.user_sg 
             print >> alt_sg_list_bat, "_END  "
             print >> alt_sg_list_bat, "stop \n\n"

             alt_sg_list_bat.close()

          
             # TEMP, CHEANGE BACK TO THE STANDAND BALBES WAY LATER
             os.chmod(self.alt_sg_list_bat_name, 0755)

             time.sleep(1)
             os.system(self.alt_sg_list_bat_name)
                  
             self.alt_sg_list_sup = {}
             self.alt_sg_list_sup['err_level'] = 0
             self.alt_sg_list_sup['err_message'] = ""
             self.alt_sg_list_sup['num_sg'] =  0

             if glob.glob("alt_sg_list.xml"):
                 self.symInfo_name = self.u_dir + "/alt_sg_list.xml"     
                 cmdl = "mv alt_sg_list.* " + self.u_dir
                 os.system(cmdl)

             time.sleep(1)

             # get possible space group candidates from "symInfo.xml"
             try:
                 self.symInfo = open(self.symInfo_name,"r")
             except IOError:
                 print   " could not find %s for reading " % self.symInfo_name
             else :
                 # parse contents of XML file
                 try:
                     symInfo_reader   = PyExpat.Reader()
                     symInfo_document = symInfo_reader.fromStream(self.symInfo)
                     self.symInfo.close()
                 except ExpatError:
                     print "parse alt_sg_list.xml failed"
                     sys.exit(1)

                 else :
                    
                     rootElement = StripXml(symInfo_document.documentElement)
                     for node in rootElement.childNodes:
                         if node.nodeName == "err_level":
                             self.alt_sg_list_sup['err_level'] = int(node.firstChild.nodeValue)
                         elif node.nodeName == "err_message":
                             self.alt_sg_list_sup['err_message'] = node.firstChild.nodeValue
                             if self.alt_sg_list_sup['err_level'] != 0:
                                 print "Error message from alt_sg_list:"
                                 print self.alt_sg_list_sup['err_message']
                         elif node.nodeName == "n_sg" :
                             self.alt_sg_list_sup['num_sg'] = int(node.firstChild.nodeValue)
                         elif node.nodeName == "SG_name":
                             t_sg = node.firstChild.nodeValue
                             sg_strgrp = t_sg.split()
                             sg_now =""
                             for a_char in sg_strgrp:
                                 sg_now = sg_now + a_char
                             
                             if sg_now and sg_now.find("(a)") == -1:
                                 self.sg_dict[sg_now] = {}
                                 self.sg_dict[sg_now]['spacegroup'] = sg_now
                                 self.sg_dict[sg_now]['new_hkl'] = self.u_dir + "/re" + sg_now + ".mtz"
                                       

     def getANewMTZ(self, t_sg):
         """ Using REINDEX to generate a new mtz file """

         # check the rule for reindexing then run reindex
         # want to see the table first.

         new_hkl_para_name = self.u_dir + "/" + t_sg + "_mtz.para"

         reindex_cmdline = "reindex hklin %s hklout %s << eof  > %s \n" \
                           %(self.hkl_infile_name,self.sg_dict[t_sg]['new_hkl'], new_hkl_para_name)
	 reindex_cmdline += "symm %s \n" %t_sg
         reindex_cmdline += "end\n"
         reindex_cmdline += "eof\n"
         os.system(reindex_cmdline)

     def cifToMtz(self):
         """ Another (maybe better) way to handle with a cif file.
             Transfer it into a mtz file """
         
         
         t_outpath = self.u_dir + "/" 
         new_mtz_name = t_outpath + "mtz_from_cif.mtz"
         sf_cif_name  = t_outpath + "sfcheck.hkl"
         sf_cif_log   = t_outpath + "sfcheck_cif.log"
         

         # generate a new cif by sfcheck

         sfcheck_cmdline = "sfcheck -f %s -po %s -ps %s -out a > %s "\
                           %(self.hkl_infile_name, t_outpath, t_outpath, sf_cif_log )
         os.system(sfcheck_cmdline)
         
         if glob.glob(sf_cif_name): 
             ciftomtz_log_name = self.u_dir + "/" + "inputcif_to_mtz.log"

             ciftomtz_cmdline = "cif2mtz hklin %s hklout %s << eof  > %s \n" \
                               %(sf_cif_name, new_mtz_name, ciftomtz_log_name)
             ciftomtz_cmdline += "end\n"
             ciftomtz_cmdline += "eof\n"
             os.system(ciftomtz_cmdline)
         else: 
             print "sfcheck does not produce a cif file."
             sys.exit()

  
         if glob.glob(new_mtz_name):
             return new_mtz_name
         else :
             return None 

     def mtzGenerator(self):
  
         self.get_userSG()
         if self.user_sg:
             self.alt_sg_dict()
             for a_sg in self.sg_dict.keys():
                 self.getANewMTZ(a_sg)
               
         return self.sg_dict


class ClusterManager :
    
     def __init__(self):
         
         pass   

     def SetBalJobBatch(self,para_dict) :
     
         if not para_dict :
             return None
         else :
             baf_name = os.path.join(para_dict['new_dir'], para_dict['name_root']+ ".csh")
             try :
                 baf = open(baf_name, "w")
             except IOError :
                 print "can not open %s for write " % baf_name
                 return None 
      
             baf.write("#!/bin/csh\n")
             baf.write("#$ -S /bin/csh\n")
             baf.write("#$ -cwd\n")
             baf.write("#$ -o %s \n" %para_dict['new_dir'])
             baf.write("#$ -e %s \n" %para_dict['new_dir'])
             t_na = "job_" + para_dict['name_root']
             baf.write("#$ -N %s \n" % ("job_" + para_dict['name_root']))
             baf.write("source %s/balbes.setup \n"%os.getenv("BALBES_ROOT"))
             baf.write("source %s/setup-scripts/csh/ccp4.setup\n"%os.getenv("CCP4_MASTER"))
             baf.write("setenv exeBalbes %s/bin_py/balbes_cluster\n"%os.getenv("BALBES_ROOT"))
             baf.write("set in_mtz=%s \n" %para_dict['in_rsf'])
             baf.write("set in_seq=%s \n" %para_dict['in_seq'])
             t_out = para_dict['new_dir'][1:].strip()
             baf.write("set path_ro=%s \n" %t_out )
   
             if para_dict.has_key('in_sol'):
                 baf.write("set in_sol=%s \n" %para_dict['in_sol'])
                 baf.write("/usr/bin/env python $exeBalbes OUT_ROOTDIR /$path_ro HKLIN /$in_mtz SEQIN /$in_seq SOLIN /$in_sol ") 
             else :  
                 baf.write("/usr/bin/env python $exeBalbes OUT_ROOTDIR /$path_ro HKLIN $in_mtz SEQIN $in_seq\n\n")

             baf.close()

             return baf_name

     def submitOneJob(self, para_dict, a_sg = None):
      
         # submit one job to the cluster and retrieve all the process info
         batch_name = self.SetBalJobBatch(para_dict)
         job_name = batch_name.split("/")[-1].split(".")[0]
         if batch_name:
             exeCmd = "qsub " + batch_name
             # os.system(exeCmd) 
             # using a simpler mechanism, not that in balbes 
             job_out = os.popen(exeCmd, "r")
             job_info = job_out.readlines()
      
             job_out.close()

             job_id = 0
             for line in job_info:
                 if line.find("Your") != -1:
                     job_id = int(line.strip().split()[2])
      
             if job_id:
                 if a_sg:
                     # one of multiple SG Jobs
                     para_dict['job_list'][a_sg] = job_id
                 else:
                     para_dict['only_job'] = job_id

     def checkJobCompletion(self, para_dict):

        jobs_active     = []
        jobs_active_pre = []
    
        # keep para_dict['job_list'] unchange because we need it later
        if para_dict.has_key('only_job'):
            jobs_active_pre.append(para_dict['only_job'])
        else:
            for a_sg in para_dict['job_list'].keys():
                jobs_active_pre.append(para_dict['job_list'][a_sg])

        while jobs_active_pre:
       
            time.sleep(60)
            jobs_active = []
            # only for the web server at YSBL
            jobs = os.popen("/opt/sge6/bin/lx24-amd64/qstat", "r")
            # in general
            #jobs = os.popen("qstat", "r")
            i_line = 0
            for line in jobs.readlines():
                i_line = i_line + 1
                if line and i_line > 2:
                    a_job_id = int(line.strip().split()[0])
                    # cjecke if the job_id belong to the current user
                    if a_job_id in jobs_active_pre:
                        jobs_active.append(a_job_id)

            # check which job finished
            for a_job_id in jobs_active_pre:
                if not (a_job_id in jobs_active):
                    if para_dict.has_key('only_job'):
                        self.getOneJobInfo(para_dict)
                        break
                    else:
                        self.writeJobInfo(a_job_id, para_dict)              

            jobs_active_pre = []
            for a_job_id in jobs_active:
                jobs_active_pre.append(a_job_id)
            jobs.close()
  
     def getOneJobInfo(self, para_dict) :
        job_file_name = para_dict['new_dir'] + "/results/Process_information.txt"
        if glob.glob(job_file_name):
             job_file = open(job_file_name, "r")
             l = 0
             s = 0
             err_str = ""
             para_dict['best_sol'] = {}
             para_dict['best_sol']['n_res'] = 0
             for line in job_file.readlines():
                 if line.find("RESOLUTIN_MAX") != -1 :
                     line_strs = line.strip().split("|")
                     para_dict['best_sol']['resol_high'] = float(line_strs[2].strip())
                 if line.find("RESOLUTIN_MIN") != -1 :
                     line_strs = line.strip().split("|")
                     para_dict['best_sol']['resol_low'] = float(line_strs[2].strip())
                 if line.find("SPACE GROUP") != -1 :
                     line_strs = line.strip().split("|")
                     para_dict['best_sol']['sg'] = line_strs[2].strip()
                 if line.find("SOLUTION SUMMARY") != -1 :
                     s = 1
                 if line.find("ITS PDB FILE") != -1 :
                     line_strs = line.strip().split("|") 
                     para_dict['best_sol']['pdb'] =  para_dict['new_dir'] + "/" + line_strs[2].strip()
                     if glob.glob(para_dict['best_sol']['pdb']):
                         para_dict['best_sol']['n_res'] = getResNum(para_dict['best_sol']['pdb'], err_str)
                     else :
                         err_str+= "unable to find the file %s \n"%para_dict['best_sol']['pdb']
                      
                 if line.find("ITS MTZ FILE") != -1 :
                     line_strs = line.strip().split("|") 
                     para_dict['best_sol']['mtz'] =  para_dict['new_dir'] + "/" + line_strs[2].strip()
                 if s == 1 and line.find("R_ini") != -1 :
                     line_strs = line.strip().split("|") 
                     Rs     = line_strs[2].strip().split("/")
                     Rfrees = line_strs[4].strip().split("/") 
                     para_dict['best_sol']['R_ini'] = float(Rs[0])
                     para_dict['best_sol']['R_fin'] = float(Rs[-1])
                     para_dict['best_sol']['Rf_ini'] = float(Rfrees[0])
                     para_dict['best_sol']['Rf_fin'] = float(Rfrees[-1])
             job_file.close()

             if err_str :      
                f1 =open(job_file_name, "a")
                f1.write(err_str)
                f1.close()

             if s==1:
                 return para_dict['best_sol']
             else:
                 return None

     def writeJobInfo(self, a_job_id, para_dict) :
    
        t_file = open(para_dict['SGJobInfo'], "a") 
        err_str = ""
        for a_sg in para_dict['job_list'].keys():
            if a_job_id == para_dict['job_list'][a_sg]:
                job_file_name = para_dict['new_dir'] + "/" + a_sg + "/results/Process_information.txt"
                if glob.glob(job_file_name):
                    job_file = open(job_file_name, "r")
                    l = 0
                    s = 0
                    
                    para_dict['solutions'] [a_sg] = {}
                    for line in job_file.readlines():
                        if l == 1 or s == 1 :
                            t_file.write(line)
                        if line.find("RESOLUTIN_MAX") != -1 :
                            line_strs = line.strip().split("|")
                            para_dict['solutions'] [a_sg]['resol_high'] = float(line_strs[2].strip())
                        if line.find("RESOLUTIN_MIN") != -1 :
                            line_strs = line.strip().split("|")
                            para_dict['solutions'] [a_sg]['resol_low'] = float(line_strs[2].strip())
                        if s == 1:
                            line_strs = line.strip().split("|") 
                            if line.find("ITS PDB FILE") != -1 :
                                para_dict['solutions'] [a_sg]['pdb'] = a_sg + "/" + line_strs[2].strip()
                                if glob.glob(para_dict['solutions'] [a_sg]['pdb']):
                                    para_dict['solutions'] [a_sg]['n_res'] = getResNum(para_dict['solutions'][a_sg]['pdb'], err_str)
                                else :
                                    err_str+= "unable to find the file %s \n"%para_dictpara_dict['solutions'] [a_sg]['pdb']
                            if line.find("ITS MTZ FILE") != -1 :
                                para_dict['solutions'] [a_sg]['mtz'] = a_sg + "/" + line_strs[2].strip()
                            if line.find("R_ini") != -1 :
                                Rs     = line_strs[2].strip().split("/") 
                                Rfrees = line_strs[4].strip().split("/") 
                                para_dict['solutions'] [a_sg]['R_ini'] = float(Rs[0])
                                para_dict['solutions'] [a_sg]['R_fin'] = float(Rs[-1])
                                para_dict['solutions'] [a_sg]['Rf_ini'] = float(Rfrees[0])
                                para_dict['solutions'] [a_sg]['Rf_fin'] = float(Rfrees[-1])
                        if line.find("TRIED") != -1:
                            t_file.write("ALL JOBS ON %s FINISHED==SUMMARY OF THE MR MODELS TRIED\n"%a_sg)
                            l =1
                        if line.find("SOLUTION SUMMARY") != -1 :
                            s =1
                            if l !=1 :
                                t_file.write("ALL JOBS ON %s FINISHED\n"%a_sg) 
                            # pick up the solution uf exists
                        if line.find("no MR template structure") != -1 :
                            t_file.write("ALL JOBS ON %s FINISHED\n"%a_sg) 
                            t_file.write("#-------------------------------------------------------------------------------------------#\n")
                            t_file.write("#%s#\n" %"No solution is found".center(91))
                            t_file.write("|-------------------------------------------------------------------------------------------|\n")
                        if line.find("No solution is found") != -1 :
                            t_file.write("ALL JOBS ON %s FINISHED\n"%a_sg) 
                            t_file.write("#-------------------------------------------------------------------------------------------#\n")
                            t_file.write("#%s#\n" %"No solution is found".center(91))
                            t_file.write("|-------------------------------------------------------------------------------------------|\n")
                            break

                    job_file.close()

                break
        if err_str:
           t_file.write(err_str)
                                  
        t_file.write("\n\n")
        t_file.close()
                          
     def writeSummaryFile(self, para_dict) :
 
        # output the best solution from all sg solutions and make some suggestion
    
        t_file = open(para_dict['SGJobInfo'], "a")

        sol_best = {}

        Rf_lowest = 1.0
        sg_best   = ""

        for a_sg in para_dict['solutions'].keys():
            if para_dict['solutions'][a_sg].has_key('Rf_fin'):
                if para_dict['solutions'][a_sg]['Rf_fin'] < Rf_lowest:
                    Rf_lowest = para_dict['solutions'][a_sg]['Rf_fin']
                    sol_best  = para_dict['solutions'] [a_sg]
                    sol_best['sg'] = a_sg

        t_file.write("\nFINAL SOLUTION SUMMARY\n")
        if sol_best.has_key('sg') :
            t_file.write("#-------------------------------------------------------------------------------------------#\n")
            t_file.write("#%s#\n" %"The best solution found is".center(91))
            t_file.write("#-------------------------------------------------------------------------------------------#\n")
            t_file.write("| ITS SPACE GROUP    |%s|\n" %sol_best['sg'].center(70))
            t_file.write("|-------------------------------------------------------------------------------------------|\n")
            t_file.write("| ITS PDB FILE       |%s|\n" %sol_best['pdb'].center(70))
            t_file.write("|-------------------------------------------------------------------------------------------|\n")
            t_file.write("| ITS MTZ FILE       |%s|\n" %sol_best['mtz'].center(70))
            t_file.write("|-------------------------------------------------------------------------------------------|\n")
            t_file.write("| R_ini/R_fin        |  %8.4f/%-8.4f  |    Rfree_ini/Rfree_fin     | %8.4f/%-8.4f |\n" \
                     %(sol_best['R_ini'], sol_best['R_fin'], sol_best['Rf_ini'], sol_best['Rf_fin']))
            t_file.write("|-------------------------------------------------------------------------------------------|\n")
            t_file.close()
            return sol_best
        else :
            t_file.write("#-------------------------------------------------------------------------------------------#\n")
            t_file.write("#%s#\n" %"No solution is found".center(91))
            t_file.write("|-------------------------------------------------------------------------------------------|\n")
            return None