#! /usr/bin/env python # -*- coding: utf-8 -*- import warnings warnings.simplefilter("ignore", DeprecationWarning) warnings.simplefilter("ignore", UserWarning) import sys import numpy import os import shutil import time import re import datetime import SystemUtility import subprocess import Quaternions import Grid import tarfile import stat import ADT import scipy.stats # import scipy.cluster.vq import threading import Bioinformatics import copy import traceback import itertools import pickle import json import cStringIO import BORGES_MATRIX import getpass # import matplotlib.pyplot as plt # import matplotlib.colors as mcolors # from matplotlib.backends.backend_pdf import PdfPages from Bio.PDB import * from termcolor import colored import xml.etree.ElementTree as ET # Auto manipulated from the library BRFfor = -1 listaEva = {} # To be set up from the binaries call LOCAL = True SHELXE_REQUIREMENTS = "" PHASER_REQUIREMENTS = "" BORGES_REQUIREMENTS = "" GRID_TYPE_L = "" GRID_TYPE_R = "" PATH_NEW_PHASER = "" PATH_NEW_SHELXE = "" PATH_NEW_BORGESCLIENT = "" PATH_NEW_ARCIFIRE = "" # INTERNAL VARIABLES TO CHANGE ONLY IF PHASER OR SHELXE CHANGES# PHASER_V1 = "2.6" PHASER_V2 = "2.7" SHELXE_V1 = "2014/4" SHELXE_V2 = "2016/1" SHELXE_V3 = "2016/1E" SHELXE_V4 = "2016" PYTHON_V = (2, 7, 9) GCC_V = "[GCC 4.4.3]" SHELXE_LST_END_CONDITION = """grep "finished" *.lst | wc -l""" SHELXE_LST_FAILURE_CONDITION = """grep "giving up" *.lst | wc -l""" SHELXE_PDB_PHS_END_CONDITION = """ls *.lst | wc -l""" SHELXE_LST_END_CONDITION_LOCAL = """finished""" SHELXE_LST_FAILURE_CONDITION_LOCAL = """giving up""" PHASER_OUT_END_CONDITION = """grep "EXIT STATUS: SUCCESS" *.out | wc -l""" PHASER_OUT_FAILURE_CONDITION = """grep "EXIT STATUS: FAILURE" *.out | wc -l""" PHASER_OUT_ANY_CASE = """grep "EXIT STATUS:" *.out | wc -l""" PHASER_NMA_END_CONDITION = PHASER_OUT_END_CONDITION PHASER_RLIST_SOL_PDB_END_CONDITION = """ls *.out | wc -l""" PHASER_OUT_END_CONDITION_LOCAL = """EXIT STATUS: SUCCESS""" PHASER_OUT_FAILURE_CONDITION_LOCAL = """EXIT STATUS: FAILURE""" PHASER_NMA_END_CONDITION_LOCAL = PHASER_OUT_END_CONDITION_LOCAL PHASER_OUT_END_TEST = 1 PHASER_NMA_END_TEST = PHASER_OUT_END_TEST PHASER_RLIST_SOL_PDB_END_TEST = 0 SHELXE_LST_END_TEST = 1 SHELXE_PDB_PHS_END_TEST = 0 NUMBER_OF_FILES_PER_DIRECTORY = 1000 BASE_SUM_FROM_WD = True LAST_AVAILABLE_ROTID = 0 MAP_OF_ROT_COMB = {} POSTMORTEM = False STOP_IF_SOLVED = True ############################################################### ####################################################################################################### # FUNCTIONS # ####################################################################################################### def mergeRotClusterObjects(Clu1, Clu2, suffix="merged",reset_euler="none"): for clu2 in Clu2: if len(clu2["heapSolutions"].asList()) > 0: for clu1 in Clu1: if len(clu1["heapSolutions"].asList()) > 0: if clu1["heapSolutions"].asList()[0][1]["n_prev_cluster"] == clu2["heapSolutions"].asList()[0][1][ "n_prev_cluster"]: kd = ADT.Heap() for item in clu1["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] + suffix if reset_euler.lower() in ["first","both"]: rota["euler"] = [0.0,0.0,0.0] kd.push(prio, rota) for item in clu2["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] if reset_euler.lower() in ["second","both"]: rota["euler"] = [0.0,0.0,0.0] kd.push(prio, rota) clu2["heapSolutions"] = kd break return Clu2 def generateFakeMRSum_sols(model_file, initlocations, mode, single_cluster, output_direc, namesum, arcimboldo=False): Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for solu in initlocations: counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [solu[3], solu[4], solu[5]], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [solu[0], solu[1], solu[2]]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = model_file if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def generateFakeMRSum(models_dir, mode, single_cluster, output_direc, namesum, arcimboldo=False): Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for root, subFolders, files in os.walk(models_dir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [0.0, 0.0, 0.0], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [0.0, 0.0, 0.0]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = pdbf if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames, isARCIMBOLDO_LITE=True): convPACK, CluPACK, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPACK, "ROTSOL") inter = [item[1] for sublist in map(lambda y: y["heapSolutions"].asList(), CluPACK) for item in sublist] zscored = {(dizio["name"], tuple(sorted(dizio["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else dizio[ "n_prev_cluster"]): dizio["zscore"] for dizio in inter} Clu = [] for clun in CluAll: dic = {"heapSolutions": ADT.Heap()} for item in clun["heapSolutions"].asList(): prio, rota = item rota["zscore"] = zscored[(rota["name"], tuple(sorted(rota["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else rota["n_prev_cluster"])] prio = (-1 * rota["llg"], -1 * rota["zscore"]) dic["heapSolutions"].push(prio, rota) Clu.append(dic) return Clu, convNames def readPMGYREsum(gyreSUMPath): f = open(gyreSUMPath, "r") alllines = f.readlines() f.close() CCVAL = [] for line in alllines: riga1 = line.split() model = getNewPathFromMerging(gyreSUMPath,riga1[1]) size = int(riga1[3]) rmsd_gyre = float(riga1[5]) rmsd_init = float(riga1[7]) core_init = int(riga1[9]) rmsd_fin = float(riga1[11]) core_fin = int(riga1[13]) rmsd_init_common = float(riga1[15]) rmsd_fin_common = float(riga1[17]) rmsd_diff = float(riga1[19]) core_common = int(riga1[21]) llg = float(riga1[23]) dizio = {"model": model, "size": size, "rmsd_gyre": rmsd_gyre, "rmsd_init": rmsd_init, "core_init": core_init, "rmsd_fin": rmsd_fin, "core_fin": core_fin, "rmsd_init_common": rmsd_init_common, "rmsd_fin_common": rmsd_fin_common, "rmsd_diff": rmsd_diff, "core_common": core_common, "llg": llg} CCVAL.append(dizio) return CCVAL def generateStatisticsPMGyre(CC_sol, outputDir, gyrePath, cycle_ref): if not os.path.exists(gyrePath): return if not os.path.exists(outputDir): os.makedirs(outputDir) CC_valuta = {} for ele in CC_sol: CC_valuta[os.path.basename(ele["corresp"])] = ele for q in range(cycle_ref - 1): sumg = os.path.join(gyrePath, str(q) + "/pm_gyre.sum") val = readPMGYREsum(sumg) graph1 = os.path.join(outputDir, "rmsdgyre_size_" + str(q)) graph1a = os.path.join(outputDir, "rmsdgyre_size_filtered_" + str(q)) qe = open(graph1 + ".scri", "w") qe.write("set terminal png size 800,1400\nset output \"" + graph1 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set ylabel "RMSD after Gyre"\n') qe.write('set xlabel "Size of the model"\n') qe.write("unset key\n") qc = open(graph1a + ".scri", "w") qc.write("set terminal png size 800,1400\nset output \"" + graph1a + ".png\"\n") qc.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qc.write("unset key\n") qc.write('set ylabel "RMSD after Gyre"\n') qc.write('set xlabel "Size of the model"\n') qn = open(graph1 + ".data", "w") qn.write("#\tX\tY\tC\n") qf = open(graph1a + ".data", "w") qf.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["size"], y["size"])): if ele["model"] in CC_valuta: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qf.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str("%.2f" % 100) + "\n") qe.write("plot \"" + graph1 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qn.close() qe.close() qc.write("plot \"" + graph1a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qf.close() qc.close() graph2 = os.path.join(outputDir, "rmsdgyre_hyst_" + str(q)) qe = open(graph2 + ".scri", "w") qe.write("set terminal png size 2400,800\nset output \"" + graph2 + ".png\"\n") qe.write('set xlabel "RMSD after Gyre"\n') qe.write('set ylabel "Number of models"\n') qe.write("unset key\n") qe.write("""set xtics rotate out set style data histograms set style fill solid border set style histogram clustered """) qn = open(graph2 + ".data", "w") qn.write("#\tX\tY\n") dicr = {} for ele in val: if str("%.2f" % ele["rmsd_gyre"]) not in dicr: dicr[str("%.2f" % ele["rmsd_gyre"])] = 1 else: dicr[str("%.2f" % ele["rmsd_gyre"])] += 1 for key in sorted(dicr.keys(), lambda x, y: cmp(float(x), float(y))): hw = dicr[key] qn.write(key + "\t" + str(hw) + "\n") qe.write("plot \"" + graph2 + ".data\" using 2:xticlabels(1) title \"Frequency rmsd_gyre\"\n") qn.close() qe.close() graph3 = os.path.join(outputDir, "init_fin_" + str(q)) graph3a = os.path.join(outputDir, "init_fin_filtered_no_tr_" + str(q)) graph3b = os.path.join(outputDir, "init_fin_filtered_no_tr_bad_core_" + str(q)) qe = open(graph3 + ".scri", "w") qea = open(graph3a + ".scri", "w") qeb = open(graph3b + ".scri", "w") qe.write("set terminal png size 1000,1000\nset output \"" + graph3 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set xlabel "RMSD initial model vs final structure"\n') qe.write('set ylabel "RMSD gyred model vs final structure"\n') qe.write("unset key\n") qea.write("set terminal png size 1000,1000\nset output \"" + graph3a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set xlabel "RMSD initial model vs final structure"\n') qea.write('set ylabel "RMSD gyred model vs final structure"\n') qea.write("unset key\n") qeb.write("set terminal png size 1000,1000\nset output \"" + graph3b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set xlabel "RMSD initial model vs final structure"\n') qeb.write('set ylabel "RMSD gyred model vs final structure"\n') qn = open(graph3 + ".data", "w") qn.write("#\tX\tY\tC\n") qna = open(graph3a + ".data", "w") qna.write("#\tX\tY\tC\n") qnb = open(graph3b + ".data", "w") qnb.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["rmsd_init_common"], y["rmsd_init_common"])): if ele["model"] in CC_valuta: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write(str("%.2f" % ele["rmsd_init_common"]) + "\t" + str( "%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % 100) + "\n") qe.write("plot \"" + graph3 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qn.close() qe.close() qea.write("plot \"" + graph3a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qna.close() qea.close() qeb.write("plot \"" + graph3b + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qnb.close() qeb.close() graph4 = os.path.join(outputDir, "rmsd_diffn_" + str(q)) graph4a = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_" + str(q)) graph4b = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_bad_core_" + str(q)) qe = open(graph4 + ".scri", "w") qea = open(graph4a + ".scri", "w") qeb = open(graph4b + ".scri", "w") qe.write("set terminal png size 4000,500\nset output \"" + graph4 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qe.write('set xlabel "Model"\n') qe.write('set ylabel "Rmsd diff"\n') qe.write('set xtics rotate out\n') qe.write("unset key\n") qea.write("set terminal png size 3000,500\nset output \"" + graph4a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qea.write('set xlabel "Model"\n') qea.write('set ylabel "Rmsd diff"\n') qea.write('set xtics rotate out\n') qea.write("unset key\n") qeb.write("set terminal png size 2000,500\nset output \"" + graph4b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qeb.write('set xlabel "Model"\n') qeb.write('set ylabel "Rmsd diff"\n') qeb.write('set xtics rotate out\n') qn = open(graph4 + ".data", "w") qn.write("#\tN\tX\tY\tC\n") qna = open(graph4a + ".data", "w") qna.write("#\tN\tX\tY\tC\n") qnb = open(graph4b + ".data", "w") qnb.write("#\tN\tX\tY\tC\n") index = 1 indexa = 1 indexb = 1 for ele in sorted(val, lambda x, y: cmp(x["model"], y["model"])): if ele["model"] in CC_valuta: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write(str(indexa) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexa += 1 if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write( str(indexb) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexb += 1 else: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % 100) + "\n") index += 1 qe.write( "plot \"" + graph4 + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qn.close() qe.close() qea.write( "plot \"" + graph4a + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qna.close() qea.close() qeb.write( "plot \"" + graph4b + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qnb.close() qeb.close() def fillClusters(DicParameters, CluAll, merged_list, unmerged_list, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg): global LAST_AVAILABLE_ROTID # NOTE: fill the cluster of the merged_list i = 1 for merged in merged_list: while 1: if not os.path.exists(merged): time.sleep(1) continue break # C,e = readClustersFromSUM(merged) e, C, Rc, er = readClustersFromSUMToDB(DicParameters, merged, "ROTSOL") npt = os.path.join(os.path.split(merged)[0], "./0") try: shutil.rmtree(npt) except: "Cannot remove", npt for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) CluAll[rota["n_prev_cluster"]]["heapSolutions"].push(prio, rota) convNames[rota["name"]] = e[blabla] # print "Merging",rota["name"],"in Cluster",rota["n_prev_cluster"] i += 1 # NOTE: for the unmerged clustering between them should be done and then add those clusters to CluAll list_rot_unmerged = [] for unmerged in unmerged_list: while 1: if not os.path.exists(unmerged): time.sleep(1) continue break C, e = readClustersFromSUM(unmerged) for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) rota["n_prev_cluster"] = LAST_AVAILABLE_ROTID rota["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, rota) list_rot_unmerged.append(dic) convNames[rota["name"]] = e[blabla] LAST_AVAILABLE_ROTID += 1 # print "Unmerged",rota["name"],"assigned cluster",LAST_AVAILABLE_ROTID i += 1 print "Start clustering unmerged rotations #:", len(list_rot_unmerged) performed, Clud = unifyClustersEquivalent(list_rot_unmerged, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "...Done! Merged clusters", len(Clud) for c in Clud: CluAll.append(c) # print "Final number of clusters: ",CluAll print "Clusters before trying unify...", len(CluAll) performed, CluAll = unifyClustersEquivalent(CluAll, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "Clusters after having unified", len(CluAll) CluAll, convNames = applyFilterName(CluAll, convNames) return CluAll, convNames def localizeModelFragmentsInRealStructure(mosedDir, predictedDir, outDir): dictRes = {} if not os.path.exists(outDir): os.makedirs(outDir) for root, subFolders, files in os.walk(predictedDir): for fileu in files: pdbf = os.path.join(root, fileu) if not pdbf.endswith(".pdb"): continue tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf, False) # print pdbf # print tupleResult # print "--------------------------" lisBigSS = tupleResult[1] strucc = tupleResult[0] cont = 0 idname = (os.path.basename(pdbf))[:-4] dictRes[idname] = [10000, ""] for root2, subFolders2, files2 in os.walk(mosedDir): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue tupleResult2 = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf2, False) lisBigSS2 = tupleResult2[1] strucc2 = tupleResult2[0] i = 0 idname2 = (os.path.basename(pdbf2))[:-4] translateStructurebyCentroidMass(cont, i, strucc, strucc2, outDir) tupleResultB = Bioinformatics.getFragmentListFromPDBUsingAllAtoms( outDir + str(cont) + "_" + str(i) + "_rottra.pdb", False) struFinal = tupleResultB[0] BlistFrags = tupleResultB[1] nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(lisBigSS2, BlistFrags, 10, 0, "A") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True # TODO:mettere l'info dei residui riguardo tutti i frammenti non solo quelli del primo nella list preS = (((lisBigSS[0])["resIdList"])[0])[1] preF = (((lisBigSS[0])["resIdList"])[-1])[1] realS = (((lisBigSS2[0])["resIdList"])[0])[1] realF = (((lisBigSS2[0])["resIdList"])[-1])[1] rmsd = Bioinformatics.getRMSD(pdbf2, outDir + str(cont) + "_" + str(i) + "_rottra.pdb", "PDB", listmodel=lisBigSS2, doNotMove=True) rmsd = rmsd[0] if rmsd < dictRes[idname][0]: if dictRes[idname][1] != "": os.remove(dictRes[idname][1]) shutil.copyfile(outDir + str(cont) + "_" + str(i) + "_rottra.pdb", outDir + "best_" + str(idname) + ".pdb") dictRes[idname] = [rmsd, outDir + "best_" + str(idname) + ".pdb", result, comp_windows, nWindows, idname2, anyWay, preS, preF, realS, realF] elif result: print "Parallel but not best rmsd:" print "idname1", idname, preS, preF print "idname2", idname2, realS, realF print "actual rmsd", dictRes[idname][0] print "FOUND RMSD:", rmsd, "FILE", outDir + "best_" + str(idname) print comp_windows os.remove(outDir + str(cont) + "_" + str(i) + "_rottra.pdb") cont += 1 i += 1 return dictRes def anisotropyCorrection_and_test(cm, sym, DicGridConn, DicParameters, current_dir, mtz, F, SIGF, Intensities, Aniso, nice, pda, hkl, ent, shelxe_line): global NUMBER_OF_FILES_PER_DIRECTORY global PATH_NEW_PHASER global PATH_NEW_SHELXE # CONFIGURE NUMBER OF PARALLEL FILES if cm != None and cm.type_grid == "torque": NUMBER_OF_FILES_PER_DIRECTORY = cm.parallel_jobs # TEST PYTHON VERSION info_p = sys.version_info info_g = (sys.version).splitlines() print "Your Python version is: ", info_g[0] if info_p[0] == PYTHON_V[0] and info_p[1] == PYTHON_V[1]: if info_p[2] != PYTHON_V[2]: print colored("OK:", "green"), "Your python version is compatible with this standalone" else: print colored("OK:", "green"), "Your python version is compatible with this standalone" elif info_p[0] == PYTHON_V[0] and info_p[1] > PYTHON_V[1]: print colored("WARNING:", "yellow"), "Your python installation is newer than required,\nno issues have been encountered with this version.\nIf you experience an error, please, try the version " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " and report the bug." else: print colored("FATAL:", "red"), "Your python version is not updated and it is incompatible with this standalone.\nPlease, install the Python " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " version." sys.exit(1) # TEST DIRECTORY and MTZ if not os.path.exists(current_dir): print colored("FATAL:", "red"), "The working directory: " + str( current_dir) + " does not exist or it is not accessible for the user: " + str(getpass.getuser()) sys.exit(1) if not os.path.exists(mtz): print colored("FATAL:", "red"), "The mtz: " + str( os.path.abspath(mtz)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) sys.exit(1) # TEST CONNECTION try: SystemUtility.open_connection(DicGridConn, DicParameters, cm) except: print colored("FATAL", "red"), print "remote connection cannot be established. You were trying to connect to the remote grid at:" print colored("host", "blue"), print DicGridConn["host"] print colored("with port", "blue"), print DicGridConn["port"] print colored("as user", "blue"), print DicGridConn["username"] print "Please, check again that your credentials (including your key or password) are correct,\nand if the problem persists contact your local administrator to report this error." sys.exit(1) # TEST PHASER PATH and VERSION outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid if len(os.path.split(PATH_NEW_PHASER)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_PHASER], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_PHASER = out.strip() if not os.path.exists(PATH_NEW_PHASER): print colored("FATAL:", "red"), "The path given for phaser: " + str( os.path.abspath(PATH_NEW_PHASER)) + " does not exist or it is not accessible to the user: " + str( getpass.getuser()) sys.exit(1) f = open(os.path.join(current_dir, "th12323.sh"), "w") f.close() f = open(os.path.join(current_dir, "th12323.sh"), "r") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() # PULIZIA FILES INUTILI os.remove(os.path.join(current_dir, "th12323.sh")) if os.path.exists(os.path.join(current_dir, "PHASER.sum")): os.remove(os.path.join(current_dir, "PHASER.sum")) outlines = out.splitlines() else: # remote_grid cm.create_remote_file("th12323.sh") out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_PHASER) + ' < th12323.sh', cm.promptB) outlines = out.splitlines() cm.remove_remote_file("th12323.sh") cm.remove_remote_file("PHASER.sum") version_ok = False for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for phaser: " + str(os.path.abspath( PATH_NEW_PHASER)) + " does not exist in the remote workstation or it is not accessible for the remote user: " + \ DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False sys.exit(1) elif PHASER_V1 in lin or PHASER_V2 in lin: version_ok = True # print "AAAAAAAAAAAAAAAAAAAAAAAAAA",out if not version_ok: print colored("FATAL:", "red"), "The Phaser version is not compatible with this standalone. Please, update your version to ", str( PHASER_V1), "or", str(PHASER_V2) # NOTE: This is temporary deactivated for developing purpose sys.exit(1) # TEST GRID current_dir2 = "" if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir("testx1245") print cm.change_remote_dir("testx1245") cm.copy_local_file(mtz, os.path.basename(mtz), send_now=True) # copy the file not create the link cm.copy_local_file(hkl, "test12345.hkl", send_now=True) if os.path.exists(ent): cm.copy_local_file(ent, "test12345.ent", send_now=True) # TEST PHASER f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") f.write("NORM EPSFAC WRITE anis.norm" + "\n") f.write('ROOT anis\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "anis.sh"), "anis.sh", send_now=True) listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) def startANISO(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice -n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startANISO, current_dir, "anis") p.start() break else: print "FATAL ERROR: ARCIMBOLDO cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires ARCIMBOLDO to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("anis" + DicParameters["nameExecution"]) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = "anis.sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile(mtz, False) job.addInputFile("anis.sh", False) job.addOutputFile("anis.out", False) # job.addOutputFile("anis.mtz",False) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Phaser Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) trial = 10 sleep = 1 if cm is None: # multiprocessing trial = 60 # sleep = 3 sleep = 10 # testing, now the anis takes much longer elif not hasattr(cm, "channel"): # local grid trial = 90 sleep = 4 else: # remote grid trial = 150 sleep = 5 nt = 0 error = False while True: nt += 1 print "Trying PHASER test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): # print "Checking stat of the file" out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) print out print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "anis.out")) and os.stat( os.path.join(current_dir, "anis.out")).st_size > 0: f = open(os.path.join(current_dir, "anis.out"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("anis.out", os.path.join(current_dir, "anis.out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue # print cm.remove_remote_file("anis.out") # print cm.remove_remote_file("anis.sum") # print cm.remove_remote_file("anis.sh") wse = cm.get_remote_file("anis.mtz", os.path.join(current_dir, "anis.mtz"), only_get_this=True) wse = cm.get_remote_file("anis.tncs", os.path.join(current_dir, "anis.tncs"), only_get_this=True) wse = cm.get_remote_file("anis.norm", os.path.join(current_dir, "anis.norm"), only_get_this=True) break elif os.path.exists(os.path.join(current_dir, "anis.out")): wse = checkYOURoutput(os.path.join(current_dir, "anis.out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break else: # print "File ",os.path.join(outputDic,str(fromIn)+".out"),os.path.exists(os.path.join(outputDic,str(fromIn)+".out")) time.sleep(sleep) continue if os.path.exists(os.path.join(current_dir, "anis.sh")): os.remove(os.path.join(current_dir, "anis.sh")) if os.path.exists(os.path.join(current_dir, "anis.sum")): os.remove(os.path.join(current_dir, "anis.sum")) out_phaser = "" err_phaser = "" if os.path.exists(os.path.join(current_dir, "anis.out")): f = open(os.path.join(current_dir, "anis.out"), "r") out_phaser = f.read() f.close() if not error: print out_phaser os.remove(os.path.join(current_dir, "anis.out")) if not os.path.exists(os.path.join(current_dir, "anis.mtz")): print colored("FATAL", "red"), "It was not possible to generate the anisotropy scaled mtz of your data. The program will end now." error = True if error: sys.exit(1) # READING THE SPACEGROUP FROM PHASER OUT spaceGroup = readSpaceGroupFromOut(out_phaser) # TODO: check if the space group read is valid. I guess phaser would not work if it is not shoncke but yes if non standard # READING THE CELL DIMENSIONS FROM PHASER OUT cell_dim = cellDimensionFromOut(out_phaser) # READING THE RESOLUTION FROM PHASER OUT resolution = resolutionFromOut(out_phaser) # READING THE NUMBER OF UNIQUE REFLECTIONS FROM PHASER OUT unique_refl = uniqueReflectionsFromOut(out_phaser) # GENERATING PDA foc = open(os.path.join(current_dir, "test12345.pdb"), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc.write(pda) foc.close() # NOTE: Test SHELXE if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) shutil.move(os.path.join(current_dir, "test12345.pdb"), os.path.join(current_dir, "test12345.pda")) if not os.path.exists(hkl): print colored("FATAL:", "red"), "The hkl: " + str( os.path.abspath(hkl)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid if len(os.path.split(PATH_NEW_SHELXE)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_SHELXE = out.strip() if not os.path.exists(PATH_NEW_SHELXE): print colored("FATAL:", "red"), "The path given for shelxe: " + str( os.path.abspath(PATH_NEW_SHELXE)) + " does not exist or it is not accessible by the user: " + str( getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() outlines = out.splitlines() else: # remote_grid out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_SHELXE), cm.promptB) outlines = out.splitlines() version_ok = False # print "============DEBUG=====================" # print out, err # print "======================================" for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for shelxe: " + str(os.path.abspath( PATH_NEW_SHELXE)) + " does not exist in the remote workstation or it is not accessible by the remote user: " + \ DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) elif SHELXE_V1 in lin or SHELXE_V2 in lin or SHELXE_V3 in lin or SHELXE_V4 in lin: version_ok = True if not version_ok: print colored("FATAL:", "red"), "The SHELXE version is not compatible with this standalone. Please, update your version to ", str( SHELXE_V2) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "test12345.pda"), "test12345.pda", send_now=True) else: shutil.copyfile(hkl, os.path.join(current_dir, "test12345.hkl")) if os.path.exists(ent): shutil.copyfile(ent, os.path.join(current_dir, "test12345.ent")) def startExpJob(outputDirectory, op, lineargs): print "Executing..." print PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + ".pda"), " ".join(lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + ".pda"] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) nq = 1 lia = shelxe_line.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shelxe_line) nl = 0 if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, current_dir, "test12345", lia) p.start() break else: print "FATAL ERROR: Arcimboldo cannot load correctly information on CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires Arcimboldo to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("shelt" + DicParameters["nameExecution"]) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile("test12345.hkl", False) job.addInputFile("test12345.pda", False) if os.path.exists(ent): job.addInputFile("test12345.ent", False) lia = shelxe_line.split() lio = ["test12345.pda"] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Shelxe Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) nt = 0 error = False while True: nt += 1 print "Trying SHELXE test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now. Please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) print out print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "test12345.lst")) and os.stat( os.path.join(current_dir, "test12345.lst")).st_size > 0: f = open(os.path.join(current_dir, "test12345.lst"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or the Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("test12345.lst", os.path.join(current_dir, "test12345.lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break elif os.path.exists(os.path.join(current_dir, "test12345.lst")): wse = checkYOURoutput(os.path.join(current_dir, "test12345.lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(20) continue break else: time.sleep(sleep) continue initCC = None fneed = False if os.path.exists(os.path.join(current_dir, "test12345.lst")): f = open(os.path.join(current_dir, "test12345.lst"), "r") allshe = f.read() f.close() lines = allshe.split("\n") for up in range(len(lines)): line = lines[up] if (line.strip()).startswith("Overall CC between"): line3 = line.strip() line3L = line3.split() initCC = float((line3L[-1])[:-1]) if (line.strip()).startswith("<|E^2-1|>"): line3 = line.strip() line3L = line3.split() if len(line3L) > 3: fneed = (line3L[4] == "-f" and line3L[5] == "missing") else: fneed = False if initCC == None: print print allshe print print colored("FATAL", "red"), "The Shelxe test job has failed. Please, read the shelxe output for errors. Check shelxe line arguments, shelxe version and hkl format correctness." error = True if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) if os.path.exists(os.path.join(current_dir, "test12345.hkl")): os.remove(os.path.join(current_dir, "test12345.hkl")) if os.path.exists(os.path.join(current_dir, "test12345.phs")): os.remove(os.path.join(current_dir, "test12345.phs")) if os.path.exists(os.path.join(current_dir, "test12345.ent")): os.remove(os.path.join(current_dir, "test12345.ent")) if os.path.exists(os.path.join(current_dir, "test12345.pdo")): os.remove(os.path.join(current_dir, "test12345.pdo")) if os.path.exists(os.path.join(current_dir, "test12345_trace.ps")): os.remove(os.path.join(current_dir, "test12345_trace.ps")) if os.path.exists(os.path.join(current_dir, "test12345.lst")): os.remove(os.path.join(current_dir, "test12345.lst")) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) if error: sys.exit(1) return os.path.join(current_dir, "anis.mtz"), os.path.join(current_dir, "anis.norm"), os.path.join(current_dir, "anis.tncs"), F, SIGF, spaceGroup, cell_dim, resolution, unique_refl, out_phaser, err_phaser, fneed def anisotropyCorrection(current_dir, mtz, F, SIGF, Intensities, Aniso, nice): f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") f.write("NORM EPSFAC WRITE anis.norm" + "\n") f.write('ROOT "' + os.path.join(current_dir, "anis") + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() f = open(os.path.join(current_dir, "anis.sh"), "r") p = None p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() print out print err # PULIZIA FILES INUTILI os.remove(os.path.join(current_dir, "anis.sh")) os.remove(os.path.join(current_dir, "anis.sum")) # NOTE: if to be used with non corrected mtz, change the labels return os.path.join(current_dir, "anis.mtz"), os.path.join(current_dir, "anis.norm"), os.path.join(current_dir, "anis.tncs"), F + "_ISO", SIGF + "_ISO", out, err def executePicasso(rootdir, sym, nameJob, model_file, quate, cell_dim, laue, ncs, Clusters, ensembles, ent, idealhelixpdb): dictRMSD = {} print "Initiliazing Picasso Post-Mortem Process..." dbdir = os.path.join(rootdir, "pmdb") if os.path.exists(dbdir): shutil.rmtree(dbdir) os.makedirs(dbdir) shutil.copyfile(ent, os.path.join(dbdir, os.path.basename(ent)[:-4] + ".pdb")) tupleResult = Bioinformatics.getFragmentListFromPDB(model_file, True, False) listaFra = tupleResult[1] old_ens_path = None pathide = None if len(listaFra) == 1 and listaFra[0]["sstype"] in ["ah", "ch"]: old_ens_path = ensembles.values()[0] new_ens_path = old_ens_path + ".bak" shutil.copyfile(old_ens_path, new_ens_path) print "Computing Gyre LLG for all possible rotations of helices of", listaFra[0]["fragLength"], "aa" print "Generating models..." print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" print "Skipping Gyre LLG test! " else: howmany = [] pathide = os.path.join(rootdir, nameJob + "_idealhelix/") if os.path.exists(pathide): shutil.rmtree(pathide) shutil.move(os.path.join(rootdir, "./library/"), pathide) f2 = open(os.path.join(rootdir, nameJob + "_idealrot_pm.txt"), "w") for root2, subFolders2, files2 in os.walk(pathide): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue f2.write(pdbf2 + "\tRMSD:" + str(rmsd) + "\n") howmany.append(pdbf2) pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][ 54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() f2.close() dipl = {} C = [] hp = ADT.Heap() brncv = {} rota = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y] rol["llg"] = 0.0 rol["zscore"] = 0.0 hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, pathide, "examine", brncv) if listaFra[0]["fragLength"] >= 13: new_length = listaFra[0]["fragLength"] - 4 helix_list = Bioinformatics.getIdealHelicesFromLenghts([new_length], idealhelixpdb) f = open(old_ens_path, "w") f.write(helix_list[0][0]) f.close() print "Generating all rotation clusters and rotational symmetry equivalent in ", os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "..." getTheTOPNOfEachCluster({}, 1, os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "matrix", quate, Clusters, ensembles, 1, True, False, None, True, cell_dim, laue, ncs, modeTra="Cmass") print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" try: shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) shutil.rmtree(os.path.join(rootdir, "Final_Frags_" + nameJob + "/")) shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.rmtree(os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) except: pass return {}, None if os.path.exists(os.path.join(rootdir, nameJob + "_mosed")): shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.move(os.path.join(rootdir, "./library/"), os.path.join(rootdir, nameJob + "_mosed")) for root2, subFolders2, files2 in os.walk(os.path.join(rootdir, nameJob + "_mosed")): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() print "Artificially computing translations for model", model_file dictRMSD = localizeModelFragmentsInRealStructure(os.path.join(rootdir, nameJob + "_mosed"), os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) f = open(os.path.join(rootdir, nameJob + "_rot_pm.txt"), "w") for key in sorted(dictRMSD.keys()): value = dictRMSD[key] linea = "PREDICTED: " + str(key) + " [" + str(value[7]) + ":" + str(value[8]) + "]" + "\t\t" + "RMSD: " + str( value[0]) + "\t" + "FILE: " + str(value[1]) + "\t" + "REAL: " + str(value[5]) + " [" + str( value[9]) + ":" + str(value[10]) + "]" f.write(linea + "\n") linea = "DIST_VAL: " + str(value[2]) f.write(linea + "\n") for i in range(len(value[6])): linea = "\t[" f.write(linea + "\n") for t in range(len((value[6])[i])): linea = "\t\t" + str(((value[6])[i])[t]) f.write(linea + "\n") linea = "\t]" f.write(linea + "\n") linea = "---------------------------------------------------------------" f.write(linea + "\n") f.close() print "End of Picasso Post-Mortem Process" # CLEANING UP shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) # shutil.rmtree(os.path.join(rootdir,nameJob+"_mosed")) # shutil.rmtree(os.path.join(rootdir,"Final_Frags_"+nameJob+"/")) # shutil.rmtree(os.path.join(rootdir,"Frags_ROTTRA_"+nameJob+"/")) if pathide is not None: return dictRMSD, os.path.join(pathide, "examine.sum") else: return dictRMSD, None def analyze_all_solutions(sym, i, DicParameters, CCV, cnv, hkl, cell_dim, spaceGroup, shlxLinea0, mosed_dir, outputDire): if os.path.exists(outputDire): shutil.rmtree(outputDire) os.makedirs(outputDire) for item in CCV: ent = item["corresp"] structure = Bioinformatics.getStructure("ent", ent) atomi = [] for model in structure: for chain in model: for resi in chain: for aty in resi: atomi += [aty] listatm = translateListByFrac(0, item["model"], atomi, item["shift_origin"], cell_dim, return_atoms=True) pdball = Bioinformatics.getPDBFromListOfAtom(listatm) CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f = open(os.path.join(outputDire, os.path.basename(ent)), "w") f.write(CRD.format(**data) + "\n") f.write(pdball[0]) f.close() """ ent = os.path.join(outputDire,os.path.basename(ent)) (nqueue6,convNames6) = startExpansion(sym,"20_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),hkl,ent,cell_dim,spaceGroup,shlxLinea0,mosed_dir) CC_Val1 = evaluateExp_CC(DicParameters, sym, "6_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),nqueue6,convNames6,isArcimboldo=True) shutil.move(os.path.join(outputDire,"6_EXPVAL_LIBRARY/solCC.sum"),os.path.join(outputDire,"sol_"+os.path.basename(ent)[:-4]+".sum")) shutil.rmtree(os.path.join(outputDire,"6_EXPVAL_LIBRARY/")) """ def trim_small_chains(pdbf, minRes): struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > minRes: chn.append(ch) chains = chn how = 0 allAtm = [] for cha in chains: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] f = open(pdbf, "w") f.write(pdball) f.close() return how def generate_chunks_byChain_progressive(pdbf, min_res, direc): listPDBS = [] struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = len(chn) print "====", min_res, chains - 1 for t in range(min_res, chains): print "evaluating", t listP = generate_chunks_byChain(pdbf, t, direc, addnum=len(listPDBS)) # print "=================================" # print listP # print "=================================" listPDBS += listP return listPDBS def generate_chunks_byChain(pdbf, omit_ss, direc, addnum=0): if not (os.path.exists(direc)): os.makedirs(direc) struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = chn listPDBS = [] for t in range(len(chains)): piece = None if t == 0: piece = chains[t + omit_ss:] elif t == len(chains) - omit_ss: piece = chains[:t] else: if t + omit_ss < len(chains): piece = chains[:t] + chains[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(chains) piece = chains[adn:t] # print adn,"-",t,len(piece) print "---", piece allAtm = [] how = 0 for cha in piece: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] pdbid, a, b = os.path.basename(pdbf)[:-4].split("_") nomefi = pdbid + "_" + str(t + addnum + 1) + "_0.pdb" filename = os.path.join(direc, nomefi) f = open(filename, "w") f.write(pdball) f.close() listPDBS.append((filename, how)) return listPDBS def generate_chunks_bySS(pdbf, omit_ss, direc): if not (os.path.exists(direc)): os.makedirs(direc) tupleResult = Bioinformatics.getFragmentListFromPDB(pdbf, False, False) struc = tupleResult[0] listaFra = tupleResult[1] listPDBS = [] if not isinstance(omit_ss, tuple): for t in range(len(listaFra)): piece = None if t == 0: piece = listaFra[t + omit_ss:] elif t == len(listaFra) - omit_ss: piece = listaFra[:t] else: if t + omit_ss < len(listaFra): piece = listaFra[:t] + listaFra[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(listaFra) piece = listaFra[adn:t] # print adn,"-",t,len(piece) pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) else: lista_ah = [] lista_bs = [] for fra in listaFra: if fra["sstype"] in ["ah", "ch"]: lista_ah.append(fra) elif fra["sstype"] in ["bs", "cbs"]: lista_bs.append(fra) num_ah = omit_ss[0] num_bs = omit_ss[1] piece_ah = [] for t in range(len(lista_ah)): piece = None if t == 0: piece = lista_ah[t + num_ah:] elif t == len(lista_ah) - num_ah: piece = lista_ah[:t] else: if t + num_ah < len(lista_ah): piece = lista_ah[:t] + lista_ah[t + num_ah:] else: adn = (t + num_ah) - len(lista_ah) piece = lista_ah[adn:t] piece_ah.append(piece) piece_bs = [] for t in range(len(lista_bs)): piece = None if t == 0: piece = lista_bs[t + num_bs:] elif t == len(lista_bs) - num_bs: piece = lista_bs[:t] else: if t + num_bs < len(lista_bs): piece = lista_bs[:t] + lista_bs[t + num_bs:] else: adn = (t + num_bs) - len(lista_bs) piece = lista_bs[adn:t] piece_bs.append(piece) for pie_ah in piece_ah: for pie_bs in piece_bs: piece = pie_ah + piece_bs pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) return listPDBS def generate_chunks_progressive(pdbf, from_omit_res, to_omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc): listPDBS = [] indicators = {} for t in range(from_omit_res, to_omit_res): indicator_A = len(listPDBS) listPDBS += generate_chunks(pdbf, t, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc, addnum=len(listPDBS)) indicator_B = len(listPDBS) indicators[t] = (indicator_A, indicator_B) return listPDBS, indicators def generate_chunks(pdbf, omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc, addnum=0): pathname = copy.deepcopy(pdbf) if not (os.path.exists(direc)): os.makedirs(direc) listPDBS = [] limitis = 0 if fix_ss != None and remove_coil != None: tupleResult = Bioinformatics.getFragmentListFromPDB(pathname, False, False) struc = tupleResult[0] listaFra = tupleResult[1] listAtoms = [] for fra in listaFra: if (fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs"): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: limitis += 1 for atm in residue: listAtoms.append(atm) if remove_coil == "maintain_coil": if not ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) elif remove_coil == "remove_coil": if ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_bs") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_ah")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(listAtoms, renumber=True, uniqueChain=True)[0] f = open(pathname, "w") f.write(pdball) f.close() pdbf = cStringIO.StringIO(pdball) listAll = Bioinformatics.getListCA("model", pdbf, "PDB", backbone=True, allInList=True, minResInChain=3) # print "Residui",len(listAll[0][0]) workList = listAll[0][0] fixed_list = [] if limitis > 0: workList = listAll[0][0][:limitis] fixed_list = listAll[0][0][limitis:] for t in range(0, len(workList), step_size): piece = None omitted = None if t == 0: piece = workList[t + omit_res:] omitted = workList[:t + omit_res] elif t == len(workList) - omit_res: piece = workList[:t] omitted = workList[t:] else: if t + omit_res < len(workList): piece = workList[:t] + workList[t + omit_res:] omitted = workList[t:t + omit_res] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_res) - len(workList) piece = workList[adn:t] omitted = workList[:adn] + workList[t + 1:] # print adn,"-",t,len(piece) allAtm = [] newlip2 = [] for resi in piece: for atm in resi: allAtm.append(atm) vald = atm.get_parent().get_id()[1] if vald not in newlip2: newlip2.append(vald) newlip = [] omitatm = [] for resi in omitted: for atm in resi: vald = atm.get_parent().get_id()[1] omitatm.append(atm) if vald not in newlip: newlip.append(vald) if mode_type == "omit": allAtm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(allAtm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[0] elif mode_type == "fragment": omitatm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(omitatm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[0] qsa = os.path.basename(pathname).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t + addnum) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() if mode_type == "omit": listPDBS.append([filename, len(piece), newlip]) elif mode_type == "fragment": listPDBS.append([filename, len(omitatm), newlip2]) """ diffch = [] for resi in omitted: for atm in resi: diffch.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm,chainFragment=True,diffchain=diffch)[0] filename = os.path.join(direc,os.path.basename(pathname)[:-4]+"-"+str(t+addnum)+"B.pdb") f = open(filename,"w") f.write(pdball) f.close() #listPDBS.append((filename,len(piece))) """ return listPDBS def startOMITllg_nogyre(DicParameters, cm, sym, DicGridConn, model_file, sizes, nameJob, outputDire, sumPath, howmany, indic, number_cluster, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPath, "ROTSOL") Clusel = [] prio = None rota = None if number_cluster != None: for clu in CluAll: if clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] == number_cluster: Clusel = clu prio, rota = clu["heapSolutions"].asList()[0] break liall = sorted(Clusel["heapSolutions"].asList(), __cmp_rota2, reverse=True) if not (os.path.exists(outputDire)): os.makedirs(outputDire) fildname = os.path.join(outputDire, str(1) + "_graph") if not os.path.exists( os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] pr = None rk = None for item in liall: pr, rk = item if os.path.basename(convNames[rk["name"]]).split("-")[-1][:-4] == str(jk): break # NOTE: to be honest rk cannot be None because it will be the last rotation of the cluster if it is not found the correct # one, so llg will never be 0. To have rk == None a boolean flag or something else must be used, but in this way # like it is now maybe is even better because we put the lowest llg observed in the cluster for the models that # are not present in the cluster if rk == None: rk = {"llg": 0} else: qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str(key) + "TOP.data\" using 1:2 title \"t" + str( key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write( str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str(res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str(safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(number_cluster) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) """ dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire,"../../../library/peaks_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/pklat_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble"+str(y) brncv[rol["name"]] = models_final[y] hp.push((-1*rol["llg"],-1*rol["zscore"]),rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames,CluAll,RotClu,encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire,"final.sum"),"ROTSOL") """ """ for du in Dust: for item in du["heapSolutions"].asList(): pi,ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf,3) """ return convNames def startOMITllg(DicParameters, cm, sym, DicGridConn, mode, sizes, nameJob, outputDire, model_file, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, convNames, quate, laue, ncs, clusteringAlg, cell_dim, thresholdCompare, evaLLONG, sumPath, howmany, indic, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, sampl=-1, VRMS=False, BFAC=False, trim_to_polyala=True, sigr=0.0, sigt=0.0, preserveChains=False, ent=None, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): limit_times = 2 size_rnp = copy.deepcopy(sizes) if mode == "RNP": limit_times = len(sizes) Clust = [] cnv = {} if sumPath != None and os.path.exists(sumPath): Clust, cnv = readClustersFromSUM(sumPath) else: din = {"heapSolutions": ADT.Heap()} lo = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} din["heapSolutions"].push((-1 * lo["llg"], -1 * lo["zscore"]), lo) Clust.append(din) cnv["ensemble1"] = model_file mode = "FRF" Dust = [] for tru in Clust: sol = tru["heapSolutions"].pop() Dust.append({"heapSolutions": ADT.Heap()}) while sol != None: prio, rota = sol try: sol = tru["heapSolutions"].pop() except: sol = None if LIMIT_CLUSTER != None and rota["n_prev_cluster"] != LIMIT_CLUSTER: continue # print "---------",rota["name"],cnv[rota["name"]] number_cluster = rota["n_prev_cluster"] times = 0 if True: times += 1 if not os.path.exists( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")): dipl = {} C = [] hp = ADT.Heap() brncv = {} tdncv = {} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y][0] tdncv[rol["name"]] = howmany[y][1] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "examine", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "examine.sum"), "ROTSOL") SystemUtility.open_connection(DicGridConn, DicParameters, cm) if mode == "RNP": ######METHOD RNP######## (nqueue10, convino) = startRNP(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times), CluAll, brncv, os.path.join(outputDire, str(times)), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, sampl=sampl, tops=tops, VRMS=VRMS, USE_TNCS=USE_TNCS, USE_RGR=USE_RGR, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL, RNP_GYRE=RNP_GYRE) CluAll, convNames, tolose = evaluateFTF(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times), os.path.join(outputDire, str(times)), nqueue10, brncv, -10, frag_fixed, quate, "RNP", laue, ncs, clusteringAlg, cell_dim, thresholdCompare, evaLLONG, LIMIT_CLUSTER=LIMIT_CLUSTER, convNames=convino, tops=1) sumPACK = os.path.join(outputDire, "examine.sum") CluAll, convNames = mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames) writeSumClusters(CluAll, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames, LIMIT_CLUSTER=LIMIT_CLUSTER) elif mode == "FRF": (nqueue, convNames) = startFRF(DicParameters=DicParameters, cm=cm, sym=sym, nameJob=nameJob + rota["name"] + "_" + str(times), dir_o_liFile=os.path.join(outputDire, rota["name"] + "_" + str( times) + "_models"), outputDire=os.path.join(outputDire, str(times)), mtz=mtz, MW=MW, NC=NC, F=F, SIGF=SIGF, Intensities=Intensities, Aniso=Aniso, normfactors=normfactors, tncsfactors=tncsfactors, nice=nice, RMSD=RMSD, lowR=lowR, highR=highR, final_rot=75, save_rot=75, frag_fixed=frag_fixed, spaceGroup=spaceGroup, sampl=sampl, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) CluAll, Rotclu = evaluateFRF_clusterOnce(DicParameters, cm, sym, DicGridConn, [], nameJob + rota["name"] + "_" + str(times), os.path.join(outputDire, str(times)), nqueue, quate, laue, ncs, spaceGroup, convNames, clusteringAlg, -100.0, frag_fixed, cell_dim, thresholdCompare, evaLLONG, applyNameFilter=False, tops=None) writeSumClusters(CluAll, outputDire, "clus", convNames) Clulu, cnvu = readClustersFromSUM(os.path.join(outputDire, "clus.sum")) Dur = [{"heapSolutions": ADT.Heap()}] cndn = {} for cr in Clulu: for item in cr["heapSolutions"].asList(): pr, ty = item pdf = cnvu[ty["name"]] vks = os.path.basename(pdf).split("-")[1][:-4] ty["name"] = "ensemble" + str(vks) cndn[ty["name"]] = pdf ty["n_prev_cluster"] = 0 ty["original_rotcluster"] = "0" Dur[0]["heapSolutions"].push((-1 * ty["llg"], -1 * ty["zscore"]), ty) writeSumClusters(Dur, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", cndn) # convNames = readClustersFromSUMToDB(DicParameters,os.path.join(outputDire, "clus_merged.sum"),"ROTSOL") elif mode == "BRF": nq, conv2 = startBRF(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) else: # if mode == "RGR": ######METHOD RGR######## nq, conv2 = startRGR(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, sigr=sigr, sigt=sigt, preserveChains=preserveChains, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True, ent=ent) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) os.remove(os.path.join(outputDire, "examine.sum")) if os.path.exists(os.path.join(outputDire, "examine.sum")): os.remove(os.path.join(outputDire, "examine.sum")) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) liall = Cnew[0]["heapSolutions"].asList() fildname = os.path.join(outputDire, str(times) + "_graph") if not os.path.exists(os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb")) or not os.path.exists(os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] for item in liall: pr, rk = item if rk["name"] == "ensemble" + str(jk): qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [ rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str( key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str( key) + "TOP.data\" using 1:2 title \"t" + str(key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write(str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str( res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str( safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(LIMIT_CLUSTER) + "\t" + str(numpy.mean(all_var)) + "\t" + str( numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = models_final[y] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "final.sum"), "ROTSOL") for du in Dust: for item in du["heapSolutions"].asList(): pi, ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf, 3) return convNames def startMR_ELLG(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, ensembles={}, fromN=0, toN=None, ellg_target=30): ''' Given a list of pdbs and their rmsd, computes both the eLLG with the model and the number of residues to get a target eLLG Keyword arguments: ellg_target: integer with the target ellg ''' dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): # Remote grid current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 # First fragment case if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): print "Preparing: " + str(pdbf) + " as model " + str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "ellg.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "ellg.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ELLG" + "\n") f.write('HKLIN "ellg.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE eLLG computation" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("ELLG TARG " + str(ellg_target) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # N fragment case print "Sorry, multiple fragments case is not yet implemented" sys.exit(0) def startMR_ELLGJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "MR_ELLG of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startMR_ELLGJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("ellg.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startFRF(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ensembles={}, tops=None, LIMIT_CLUSTER=None, justpdb=None, fromN=0, toN=None, VRMS=False, BFAC=True, BULK_FSOL=-1, BULK_BSOL=-1): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): if justpdb != None and pdbf != os.path.abspath(justpdb): continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FRF" + "\n") f.write('HKLIN "rot.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test rotation for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(counter + 1) + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("SEARCH DEEP OFF" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # even if frag_fixed is 2 we still do not have computed anything for the second fragment search so we have to recollect data # as they would be at the first fragment, then when we evaluateFRF we will change the content of the dataase and the FTF could # be computed taking the data as 2 fragment. ensels = [] tovalu = frag_fixed if frag_fixed == 2: tovalu = 1 cou = 0 ClusAll = None if LIMIT_CLUSTER != None: ClusAll = dir_o_liFile[LIMIT_CLUSTER] else: ClusAll = {} nh = ADT.Heap() for clun in dir_o_liFile: for item in clun["heapSolutions"]: prio, rota = item nh.push(prio, rota) ClusAll["heapSolutions"] = nh for sol in ClusAll["heapSolutions"]: if tops != None and cou >= tops: break rota = sol[1] prio = sol[0] pdbf = "" list_pdbs = {} ct = 0 enselines = "" if frag_fixed > 1 and "ensemble" + str(frag_fixed) in ensembles: pdbf = ensembles["ensemble" + str(frag_fixed)] if pdbf not in list_pdbs.keys(): list_pdbs[pdbf] = ("ensemble" + str(frag_fixed), ct, True) ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, False) ct += 1 elif list_pdbs[ensembles[rota["name"]]][0] != rota["name"]: dfr = list_pdbs[ensembles[rota["name"]]][1] snc = "ENSEMBLE " + rota["name"] + " PDBFILE " + str(dfr) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) else: pdbf = ensembles[rota["name"]] # ensembles["ensemble"+str(frag_fixed)] = pdbf if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 # rota["name"] = "ensemble"+str(frag_fixed) if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 elif list_pdbs[ensembles[rotafi["name"]]][0] != rotafi["name"]: dfr = list_pdbs[ensembles[rotafi["name"]]][1] snc = "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str(dfr) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) numall = ct # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") headlines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FRF" + "\n" headlines += 'HKLIN "rot.mtz"' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' headlines += "TITLE Test rotation for Grid" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] snc = "ENSEMBLE " + val[0] + " PDBFILE " + str(val[1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) if val[2]: enselines += "SEARCH ENSEMBLE " + val[0] + "\n" if frag_fixed > 1: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" if frag_fixed > 2: for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) taillines += "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n" taillines += "SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(rota["llg"]) + " ZSCORE: " + str( rota["zscore"]) + "\n" taillines += "SEARCH METHOD FAST" + "\n" taillines += "SEARCH DEEP OFF" + "\n" taillines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: taillines += "SOLPARAMETERS BULK USE ON" + "\n" taillines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: taillines += "SOLPARAMETERS BULK USE OFF" + "\n" if sampl != -1: taillines += "SAMPLING ROT " + str(sampl) + "\n" taillines += "PEAKS ROT SELECT PERCENT" + "\n" taillines += "PEAKS ROT CUTOFF " + str(save_rot) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),"./"+str(counter)+".sh") counter += 1 # print "checking",counter,counter%NUMBER_OF_FILES_PER_DIRECTORY,ClusAll["heapSolutions"].len(),counter >= 1 and ClusAll["heapSolutions"].len() == 0 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (counter >= 1 and ClusAll["heapSolutions"].len() == 0): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) cou += 1 if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE ROTATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFRFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all rotations with a peak >= " + str(final_rot) print "Saving all rotations with a peak >= " + str(save_rot) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFRFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rot.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startPlaneElongation(inputDirectory, outputDirectory, n_res, n_trials, n_copies): import Data if n_copies > 3: print "Cannot use more than 3 copies to exapand the partial solution. Quit." sys.exit(1) if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): rmsd = None allAtoms = [] atoms_add = [] pdball = None f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) list_res = [] for model in struc: for chain in model: list_res += chain.get_unpacked_list() for resi in list_res: for atm in resi: atoms_add.append(atm) charch = ["W", "Y", "Z"] sx = 0 for key in Data.frequent_folds.keys(): pdball = "" sx += 1 struttura = Data.frequent_folds[key] struc_c = Bioinformatics.getStructure(key, cStringIO.StringIO(struttura)) list_res_c = [] atoms_add_c = [] for model_c in struc_c: for chain_c in model_c: list_res_c += chain_c.get_unpacked_list() for resi_c in list_res_c: for atm_c in resi_c: atoms_add_c.append(atm_c) for nd in range(n_copies): indo = numpy.random.randint(len(list_res) - n_res) reference = [] for z in range(indo, indo + n_res): atomCA = list_res[z]["CA"] atomC = list_res[z]["C"] atomO = list_res[z]["O"] atomN = list_res[z]["N"] reference.append(atomCA.get_coord()) reference.append(atomC.get_coord()) reference.append(atomO.get_coord()) reference.append(atomN.get_coord()) best_rt = (None, None) best_rmsd = 100 rmsd = 100 times = 0 while times < 5000: times += 1 compare = [] indi = numpy.random.randint(len(list_res_c) - n_res) for z in range(indi, indi + n_res): atomCA = list_res_c[z]["CA"] atomC = list_res_c[z]["C"] atomO = list_res_c[z]["O"] atomN = list_res_c[z]["N"] compare.append(atomCA.get_coord()) compare.append(atomC.get_coord()) compare.append(atomO.get_coord()) compare.append(atomN.get_coord()) transf, rmsd_list, rmsd = Bioinformatics.fit_wellordered(numpy.array(reference), numpy.array(compare), n_iter=1, full_output=True) R, t = transf if rmsd < best_rmsd: best_rmsd = rmsd best_rt = (R, t) # print "Selected best rmsd for",key,"is",best_rmsd,times allAtoms = Bioinformatics.transform_atoms(atoms_add_c, best_rt[0], best_rt[1]) pdball += Bioinformatics.getPDBFromListOfAtom(allAtoms, renumber=True, uniqueChain=True, chainId=charch[nd])[0] + "\n" filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + "-" + str(sx) + ".pdb") print "File", os.path.basename(filename), "rmsd:", best_rmsd f = open(filename, "w") f.write(allpdb + "\n") f.write(pdball + "\n") f.close() def startRandomlyExpand(inputDirectory, outputDirectory, ray, n_ca_pick, n_trials): ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): atoms_add = [] f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) for model in struc: for chain in model: list_res = chain.get_unpacked_list() indices = numpy.random.randint(len(list_res) - 1, size=n_ca_pick) print "Picking random indices", indices for index in indices: atom_ca = list_res[index]["CA"] nx, ny, nz = atom_ca.get_coord() cz = numpy.random.randint(-1 * ray, high=ray) phi = numpy.random.uniform(low=0.0, high=2.0) nx += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.cos(phi) ny += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.sin(phi) nz += cz atoms_add.append([nx, ny, nz, atom_ca.get_parent().get_segid(), atom_ca.get_name(), atom_ca.get_fullname(), atom_ca.get_altloc()]) filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + ".pdb") f = open(filename, "w") f.write(allpdb + "\n") atom_number = 1 resseq = 1 line = "" for atom in atoms_add: args = ( "ATOM ", atom_number, atom[5], atom[6], 'ALA', 'Z', resseq, ' ', atom[0], atom[1], atom[2], 1.0, 25.0, atom[3], 'C', ' ') line = ATOM_FORMAT_STRING % args resseq += 1 atom_number += 1 f.write(line) f.close() def startPREPARE(cm, sym, nameJob, CC_Val, outputDirectory, cell_dim, spaceGroup, nTop, topNext=None): if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) counter = 0 convNames = {} if topNext != None: counter = 0 listaWork_key = map(lambda x: "ensembleIDxx" + os.path.basename(x["corresp"])[:-4].split("xx")[-1], CC_Val) listaWork_value = map(lambda x: x["initcc"], CC_Val) listaWork = dict(zip(listaWork_key, listaWork_value)) # print listaWork_key # print listaWork_value counter = 0 for clu in CC_Val: pdbf = clu["corresp"] if nTop != None and counter > nTop: break # print clu print "Preparing: " + str(pdbf) + " as model " + str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if "suffix" in clu.keys(): nomino = os.path.basename(pdbf)[:-4] + clu["suffix"] + ".pdb" else: nomino = os.path.basename(pdbf) foc = open(outputDirectory + "/" + nomino, "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") data2 = foc2.readlines() datas = "" for lin in data2: ler = lin.split() if ler[0] != "CRYST1": datas += lin foc2.close() foc.write(datas) foc.close() convNames["ensemble" + str(counter)] = outputDirectory + "/" + nomino counter += 1 return convNames def getTheTOPNOfEachCluster(DicParameters, frag_fixed, dirout, mode, quate, ClusAll, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, modeTra="frac", LIMIT_CLUSTER=None, renameWithConvNames=False, sufixSolPos=False, appendToName=""): if dirout != "" and not os.path.exists(dirout): os.makedirs(dirout) bests = [] foms = {"llg": [numpy.inf, 0.0], "zscore": [numpy.inf, 0.0]} for ci in range(len(ClusAll)): clu = ClusAll[ci] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != ci: continue s = 0 cou = 0 hp = ADT.Heap() liBest = [] i = 0 for sol in clu["heapSolutions"]: if ntop != None and i >= ntop: break bestRota = sol[1] prio = (bestRota["llg"], bestRota["zscore"]) i += 1 # print bestRota["name"],convNames[bestRota["name"]] liBest.append(bestRota) llg = bestRota["llg"] zscore = bestRota["zscore"] cluster_fin = bestRota["original_rotcluster"] if llg < (foms["llg"])[0]: (foms["llg"])[0] = llg if llg > (foms["llg"])[1]: (foms["llg"])[1] = llg if zscore < (foms["zscore"])[0]: (foms["zscore"])[0] = zscore if zscore > (foms["zscore"])[1]: (foms["zscore"])[1] = zscore if writePDB: pdbSt = [[]] if "fixed_frags" in bestRota: for frifr in bestRota["fixed_frags"]: allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, frifr, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) pdbSt.append([]) allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, bestRota, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) wow = numpy.array(pdbSt) for fileC in wow.transpose(): path_c, new_name_cond = os.path.split(os.path.normpath(allpdbli.pop(0))) new_name_cond = new_name_cond.split("_")[0] + appendToName + "_" + new_name_cond.split("_")[ 1] + "_" + new_name_cond.split("_")[2] f = open(os.path.join(path_c, new_name_cond), "w") druppo = str(cluster_fin) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(Bioinformatics.sequentialRenumberListOfPdbs(fileC)) f.close() bests.append(liBest) # for bestRota in liBest: # priority = (-1*bestRota["llg"], -1*bestRota["zscore"]) # clu["heapSolutions"].push(priority,bestRota) return bests, foms def filterClustersAndSolutionByCores(CluAll, sym): numeroClus = 2 * sym.PROCESSES numeroRot = 64 * sym.PROCESSES CluAll = sorted(CluAll, __cmp_cluster, reverse=True) print "Original clusters", len(CluAll) n = 0 Clu = [] for clu in CluAll: if n >= numeroClus: print "Break clusters because", n, numeroClus break s = 0 clun = {"heapSolutions": ADT.Heap()} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] if s >= numeroRot: print "Break rotations because", s, numeroRot break clun["heapSolutions"].push(prio, rota) s += 1 Clu.append(clun) n += 1 print "Final Clusters", len(Clu) return Clu def filterAndCountClusters(ClusAll, ensembles, mode, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg, unify=False): condition = True while condition: nea = '' neb = '' numc = 0 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrpdbs = {} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] numc = rota["n_prev_cluster"] pdbname = ensembles[rota['name']] if ensembles[rota['name']] not in nrpdbs: nrpdbs[ensembles[rota['name']]] = rota else: r = nrpdbs[ensembles[rota['name']]] if mode == 'llg' and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'llg' and rota['llg'] == r['llg'] and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] == r['zscore'] and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota newDic = {"heapSolutions": ADT.Heap()} for key in nrpdbs.keys(): rota = nrpdbs[key] newDic["heapSolutions"].push((-1 * rota["llg"], -1 * ["zscore"]), rota) ClusAll[inde] = newDic if unify: condition, ClusAll = unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg) else: condition = False return ClusAll def applyFilterName(CluAll, ensembles): convNames = {} for clu in CluAll: rotazioni = map(lambda x: x[1], clu["heapSolutions"].asList()) pdb_done = [] hp = ADT.Heap() for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): if (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) priority = (-1 * rotaz['llg'], -1 * rotaz['zscore']) hp.push(priority, rotaz) convNames[rotaz["name"]] = ensembles[rotaz["name"]] clu["heapSolutions"] = hp return CluAll, convNames # NOTE: this method is supported just for ARCIMBOLDO-BORGES.py and NOT!!!! for ARCIMBOLDO def unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg): performed = False visited = [] numc = 0 for a in range(len(ClusAll)): clu1 = ClusAll[a] if a in visited: continue visited.append(a) if len(clu1["heapSolutions"].asList()) == 0: continue item1 = clu1["heapSolutions"].pop() clu1q = item1[1] clu1["heapSolutions"].push(item1[0], item1[1]) numc = clu1q["n_prev_cluster"] for b in range(a + 1, len(ClusAll)): if b in visited: continue clu2 = ClusAll[b] if len(clu2["heapSolutions"].asList()) == 0: continue item2 = clu2["heapSolutions"].pop() clu2q = item2[1] clu2["heapSolutions"].push(item2[0], item2[1]) # threshold_alg = -1 # if clusteringAlg == 'quaternion': # threshold_alg = 0.06 # elif clusteringAlg == 'distributionCV': # threshold_alg = 10 # else: # return re, elo = compareRotation(clu1q, clu2q, threshold_alg, clusteringAlg, quate, laue, listNCS, ensembles, cell_dim, True) if re: performed = True visited.append(b) for ite in clu2["heapSolutions"]: prio2 = ite[0] rota2 = ite[1] rota2['n_prev_cluster'] = numc # NOTE: the following instruction imply we are always working with 1 fixed frag. This method thus # it is not supported for ARCIMBOLDO but just for ARCIMBOLDO-BORGES rota2['original_rotcluster'] = numc priority = (-1 * rota2['llg'], -1 * rota2['zscore']) clu1["heapSolutions"].push(priority, rota2) return performed, ClusAll def filterOutImprobableSols(ClusAll, minLLG): llg_all = [] zscore_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) llg_all = sorted(llg_all, reverse=True) zscore_all = sorted(zscore_all, reverse=True) zscore_thresh = 0 llg_thresh = None if zscore_all[0] >= 7.50: zscore_thresh = 7.50 print "Found promising solutions with Zscore at:", zscore_all[0] print "Pruning solutions with lower zscore." ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["zscore"] >= zscore_thresh and rota["llg"] >= minLLG: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def filterAllSolsByTop(ClusAll, frag_fixed, percentage): llg_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) llg_avg = float(numpy.mean(numpy.array(llg_all))) # limiteLLG = (llg_avg*(100-percentage))/50.0 # if llg_avg < 0: # limiteLLG += llg_avg # if llg_top < 0: # limiteLLG += llg_top print "Pruning solutions out of the mean. AVG LLG:", llg_avg ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["llg"] >= llg_avg: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def startExpansion(cm, sym, nameJob, outputDire, hkl, ent, nice, cell_dim, spaceGroup, shlxLine, dirBase, limit_CC_data=None, treshold_CC=None, fragdomain=False, single=False, insfile=None): if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" convNames = {} rootdir = dirBase searchfor = ".pdb" startfile = ".pda" if insfile != None: limit_CC_data = None treshold_CC = None searchfor = ".phi" startfile = ".phi" for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(searchfor): if limit_CC_data != None and treshold_CC != None: toContinue = False for m in limit_CC_data: if m["corresp"] == pdbf: if float(m["initcc"]) < treshold_CC: toContinue = True else: toContinue = False break if toContinue: continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 if insfile == None: # shutil.copyfile(pdbf, outputDirectory+"/"+str(counter)+".pda") foc = open(outputDirectory + "/" + str(counter) + ".pda", "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") alls = foc2.readlines() foc2.close() countt = 0 scrivi = True previous = -1 lineaDascri = None if fragdomain: for linea in alls: if linea.startswith("REMARK"): foc.write(linea) elif linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: foc.write("REMARK DOMAIN " + str(countt + 1) + "\n") if lineaDascri != None: foc.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): foc.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: foc.write(linea) previous = residuo else: for linea in alls: foc.write(linea) foc.close() else: try: # os.symlink(pdbf,os.path.join(outputDirectory,str(counter)+".phi")) shutil.copyfile(pdbf, os.path.join(outputDirectory, str(counter) + ".phi")) except: print "", os.path.join(outputDirectory, str(counter) + ".phi"), "already exists!" try: os.symlink(hkl, os.path.join(outputDirectory, str(counter) + ".hkl")) except: print "", os.path.join(outputDirectory, str(counter) + ".hkl"), "already exists!" if insfile != None: if hasattr(cm, "channel"): shutil.copyfile(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) else: try: os.symlink(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) except: print "", os.path.join(outputDirectory, str(counter) + ".ins"), "already exists!" try: if os.path.exists(ent): os.symlink(ent, os.path.join(outputDirectory, str(counter) + ".ent")) except: print "", os.path.join(outputDirectory, str(counter) + ".ent"), "already exists!" if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) if insfile == None: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".pda", "./" + str(ndir - 1) + "/" + str(counter) + ".pda") else: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".phi", "./" + str(ndir - 1) + "/" + str(counter) + ".phi") cm.copy_local_file(outputDirectory + "/" + str(counter) + ".ins", "./" + str(ndir - 1) + "/" + str(counter) + ".ins") cm.create_remote_link(cm.remote_hkl_path, str(counter) + ".hkl") if os.path.exists(ent): cm.create_remote_link(cm.remote_ent_path, str(counter) + ".ent") convNames[str(counter) + startfile] = pdbf # conv2[rota["name"]] = str(counter)+".pda" counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startExpJob(outputDirectory, op, lineargs): if os.path.exists(os.path.join(outputDirectory, str(op) + ".phs")): return print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + startfile), " ".join( lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + startfile] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter lia = shlxLine.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shlxLine) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, outputDirectory, op, lia) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_SHELXE + " " + os.path.join(outputDirectory, str(op) + startfile) + " " + " ".join( lia) + "/dev/null" SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, single=single) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile(".hkl", True) job.addInputFile(startfile, True) if os.path.exists(ent): job.addInputFile(".ent", True) if insfile != None and os.path.exists(insfile): job.addInputFile(".ins", True) # job.addOutputFile(".out",True) # job.setMaxRuntime(172800) # job.setPeriodicRemove("JobStatus == 2 &&((CurrentTime - EnteredCurrentStatus) + RemoteWallClockTime - CumulativeSuspensionTime > $(maxRunTime))") lia = shlxLine.split() lio = [startfile] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, convNames def startPACK(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, packSelect, cutoff, distance, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False): if not (os.path.exists(outputDire)): os.makedirs(outputDire) if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and counter >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc() pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("PACK CUTOFF " + str(cutoff) + "\n") # NOTE: If PACK COMPACT ON, Pack ensembles into a compact association # (minimize distances between centres of mass for the addition of each component in a solution) f.write("PACK COMPACT OFF" + "\n") # OFF at the moment, check if it would be better ON f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startPACKOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, packSelect, cutoff, distance, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, VRMS=False, BFAC=False, randomize_trans_per_rot=0): if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for cds in range(len(ClusAll)): clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = sol[0] cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 # print "===================================TEST RAPIDO=================" # print rota["name"],ensembles[rota["name"]] # print rota["fixed_frags"][0]["name"],ensembles[rota["fixed_frags"][0]["name"]] # print "===================================TEST RAPIDO=================" if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc(file=sys.stdout) pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # print "B remote cwd",cm.get_remote_pwd() if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("PACK CUTOFF " + str(cutoff) + "\n") f.write("PACK COMPACT OFF" + "\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") else: for valok in list_pdbs: valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] lip.append(int(rota["original_rotcluster"].split("_")[-1])) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: lip.append(int(r["original_rotcluster"].split("_")[-1])) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) # print "!!!!!!!!!!",lip fixd = [] for ep in range(len( lip) - 1): # the last one would be excluded because is not the fixed but actual rotation ced = str(lip[ep]) initial = None # print "cambio de guardia",ced while True: rep = ros.pop(0) # print "initial,initial==rep",initial,initial==rep # if initial != None: # print rep["name"],rep["original_rot_cluster"],initial["name"],initial["original_rot_cluster"] if initial != None and initial == rep: ros.append(rep) break if initial == None: initial = rep # print rep["original_rotcluster"].split("_")[-1], ced if rep["original_rotcluster"].split("_")[-1] == ced: if len(fixd) == 0: rep["original_rotcluster"] = ced rep["n_prev_cluster"] = int(ced) else: rep["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + ced rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) # if ep == len(lip)-1: # rep["fixed_frags"] = fixd # else: rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) # print "He entrado aqui" break else: # print "No es la que quiero paso a la proxima" ros.append(rep) # print "////////////////",len(ros) rota = ros.pop() # print rota # print "/////////////////",len(fixd) # print fixd # print "//////////////////" if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] # print rota["original_rotcluster"] if randomize_trans_per_rot <= 0: if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 else: for rnds in numpy.random.uniform(low=0.000, high=0.999, size=(randomize_trans_per_rot, 3)): if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " " + rota[ "name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startOCC(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ellg=None, nres=None, rangeocc=None, merge=None, occfrac=None, occoffset=None, ncycles=None, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "occ.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "occ.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_OCC" + "\n") f.write('HKLIN "occ.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test OCC for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("OCCUPANCY WINDOW NRES 1" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") f.write("SOLU 6DIM ENSE ensemble" + str(counter + 1) + " EULER 0.0 0.0 0.0 FRAC 0.0 0.0 0.0\n") if ellg != None and isinstance(ellg, float): f.write("OCCUPANCY WINDOW ELLG " + str(ellg) + "\n") if nres != None and isinstance(nres, int): f.write("OCCUPANCY WINDOW NRES " + str(nres) + "\n") if rangeocc != None and isinstance(rangeocc, list) and len(rangeocc) == 2: f.write("OCCUPANCY MIN " + str(rangeocc[0]) + " MAX " + str(rangeocc[1]) + "\n") if merge != None and isinstance(merge, bool): if merge: f.write("OCCUPANCY MERGE ON" + "\n") else: f.write("OCCUPANCY MERGE OFF" + "\n") if occfrac != None and isinstance(occfrac, float): f.write("OCCUPANCY FRAC " + str(ellg) + "\n") if occoffset != None and isinstance(occoffset, int): f.write("OCCUPANCY OFFSET " + str(occoffset) + "\n") if ncycles != None and isinstance(ncycles, int): f.write("OCCUPANCY NCYCLES " + str(ncycles) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startOCCJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "OCC of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startOCCJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("occ.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, ensembles def startNMA(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, ensembles def startNMAFromClusters(DicParameters, cm, sym, ClusAll, ensembles, nameJob, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 conv2 = {} for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") conv2["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAP1Job(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def shredFromEnt(ent, length): listAll = Bioinformatics.getListCA("ent", ent, "PDB", backbone=True, allInList=True) fullength = len(listAll[0][0]) trozos = fullength - length piecesPDBs = [] for t in range(trozos): piece = listAll[1][t:length] allAtm = [] for resi in piece: for atm in resi: allAtm.append(atm) piecesPDBs.append(getPDBFromListOfAtom(allAtm)[0]) return piecesPDBs def startBRF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): global listaEva if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "brf.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "brf.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics.getPDBFromListOfAtom(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=not isOMIT) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ROT" + "\n") f.write("TARGET ROT BRUTE" + "\n") f.write('HKLIN "brf.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement vs Rotation fucntion " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") f.write("#SPACEGROUP " + spaceGroup + "\n") # f.write("XYZOUT ON"+"\n") # f.write("TOPFILES "+str(1)+"\n") # if not USE_TNCS: f.write("TNCS USE OFF\n") # else: # f.write("TNCS USE ON\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") """ else: f.write("SOLU SET"+"\n") f.write("SOLU SPAC "+spaceGroup+"\n") if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+" #CLUSTER: "+str(rota["original_rotcluster"].split("_")[-1])+"\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+"\n") """ if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("SEARCH METHOD FULL" + "\n") f.write("SEARCH DEEP OFF" + "\n") f.write( "ROTATE VOLUME AROUND EULER " + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + " RANGE " + str(5) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "BRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("brf.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRGR(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, sigr=0.0, sigt=0.0, preserveChains=False, BULK_FSOL=-1, BULK_BSOL=-1): global listaEva # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rgr.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rgr.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics.getPDBFromListOfAtom(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=(not isOMIT and not preserveChains)) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_GYRE" + "\n") f.write('HKLIN "rgr.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement vs Rotation function " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACR ROT ON TRA ON SIGR " + str(sigr) + " SIGT " + str(sigt) + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") f.write("TNCS USE OFF\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") f.write("SORT ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " #CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RGR of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rgr.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRBRP1(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): global listaEva # TODO: This function should use the files with the norm and tncs factors, but those of the P1 data, that should be generated at the beginning # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" RNP_GYRE = "\n MACMR CHAINS ON" if RNP_GYRE else "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "refP1.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtzP1_path, "refP1.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") # corresponding = (pdbf.split("/"))[-1] # listona = corresponding.split("_") # pdbid = listona[0] # model = listona[1] # idSolution = listona[2] # idSolution,ext = idSolution.split(".") lop = open(outputDirectory + "/" + str(counter) + ".pdb", "r") alls = lop.readlines() lop.close() countt = 0 scrivi = True lai = None previous = -1 lineaDascri = None for linea in alls: if linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) lai = open(outputDirectory + "/" + str(countt) + "_" + str(counter) + ".pdb", "w") if lineaDascri != None: lai.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): lai.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: lai.write(linea) previous = residuo if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) numFrag = countt f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "refP1.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement in P1 " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") for i in range(countt): f.write( "ENSEMBLE ensemble" + str(i) + " PDBFILE " + str(i) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for i in range(countt): if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, numFrag, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPP1Job(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP in P1 of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("refP1.mtz", False) for i in range(numFrag): job.addInputFile(str(i) + "_" + ".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def readMR_ELLGout(path_out_file=None, out_text=None, pdb_filepath=None, nfrag=1): if nfrag != 1: print "Currrently only a single fragment is supported" sys.exit(0) if path_out_file != None and out_text == None: out_file = open(path_out_file, 'r') out_lines = out_file.readlines() del out_file elif out_text != None: out_lines = out_text.splitlines() pdb_file = open(pdb_filepath, 'r') list_ca = Bioinformatics.getListCA(name=pdb_filepath[:-4], pdbf=pdb_file, mode='PDB') nres = len(list_ca[0][0]) results = {} regex_table1 = re.compile("eLLG Values Computed for All Data") regex_table2 = re.compile("Residues/Resolution for eLLG target") regex_table3 = re.compile("Resolution for eLLG target") older_ver = True for i in range(len(out_lines)): if bool(regex_table1.findall(out_lines[i])) and (not out_lines[i + 2].split()[0] == 'target-reso:'): ensemble_name = (out_lines[i + 3]).split()[0] frac_scat = (out_lines[i + 3]).split()[1] current_ellg = float(((out_lines[i + 3]).split())[4]) if bool(regex_table1.findall(out_lines[i])) and (out_lines[i + 2].split()[0] == 'target-reso:'): ensemble_name = (out_lines[i + 4]).split()[0] frac_scat = (out_lines[i + 4]).split()[1] current_ellg = float(((out_lines[i + 4]).split())[4]) if bool(regex_table2.findall(out_lines[i])): nres_for_target = (out_lines[i + 5]).split()[2] res_for_target = (out_lines[i + 5]).split()[3] older_ver = False if bool(regex_table3.findall(out_lines[i])) and older_ver: nres_for_target = None res_for_target = (out_lines[i + 3]).split()[2] results = {'number_of_residues': nres, 'fraction_scattering': frac_scat, 'ellg_current_ensemble': current_ellg, 'nres_for_target_ellg': nres_for_target, 'resolution_for_target': res_for_target, 'fullpath': pdb_filepath} return results def readMR_ELLGsum(path_sum_file, nfrag=1): dict_result = {} file_sum = open(path_sum_file, 'r') lines = file_sum.readlines() del file_sum for i in range(0, len(lines), 5): ensemble = getNewPathFromMerging(path_sum_file,((lines[i + 1]).split())[1]) # print 'ensemble',ensemble dict_result[ensemble] = {} dict_result[ensemble]['number_of_residues'] = int(((lines[i + 2]).split())[1]) # print "dict_result[ensemble]['number_of_residues']",dict_result[ensemble]['number_of_residues'] dict_result[ensemble]['fraction_scattering'] = float(((lines[i + 2]).split())[3]) # print "dict_result[ensemble]['fraction_scattering']",dict_result[ensemble]['fraction_scattering'] if ((lines[i + 2]).split())[8] != 'None': # the nres was available dict_result[ensemble]['nres_for_target_ellg'] = int(((lines[i + 2]).split())[8]) else: # no nres was computed dict_result[ensemble]['nres_for_target_ellg'] = None # print "dict_result[ensemble]['nres_for_target_ellg']",dict_result[ensemble]['nres_for_target_ellg'] dict_result[ensemble]['ellg_current_ensemble'] = float((lines[i + 3].split())[2]) # print "dict_result[ensemble]['ellg_current_ensemble']",dict_result[ensemble]['ellg_current_ensemble'] dict_result[ensemble]['resolution_for_target'] = float((lines[i + 3].split())[7]) # print "dict_result[ensemble]['resolution_for_target']",dict_result[ensemble]['resolution_for_target'] return dict_result def evaluateMR_ELLG(DicParameters, cm, DicGridConn, nameJob, outputDicr, nqueue, ensembles, isArcimboldo=False, nfrag=1): ''' This function reads the output from the startMR_ELLG function and evaluates it, producing a sum file. Keyword input: - DicParameters - cm: grid object - DicGridConn - nameJob - outputDicr - nqueue - ensembles - isArcimboldo - nfrag Return: - dict_result Writes sum file Closes the connection ''' # Prepare the directories dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): # Only in remote grid case current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) # Prepare variables to read fromIn = 0 toIn = nqueue - 1 ndir = 0 dict_result = {} dirente2 = "" current_dir2 = "" # Read while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' mr_ellg file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir = ndir + 1 while 1: outname = str(fromIn) + ".out" local_path = os.path.join(outputDic, outname) if hasattr(cm, 'channel'): file_ended = cm.get_remote_file(remotefile=outname, localfile=local_path, conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(file_ended, bool) and not file_ended: print "File " + outname + " not ready sleeping 3 seconds" time.sleep(3) continue break elif os.path.exists(local_path): checkYOURoutput(myfile=local_path, conditioEND=PHASER_OUT_END_CONDITION_LOCAL, testEND=PHASER_OUT_END_TEST) break else: time.sleep(3) continue # Read the information from the out out_file = open(local_path, 'r') out_content = out_file.read() print out_content del out_file pdb_filepath = outputDic + str(fromIn) + ".pdb" results = readMR_ELLGout(out_text=out_content, pdb_filepath=pdb_filepath) name_ensemble = ensembles['ensemble' + str(fromIn + 1)] dict_result[name_ensemble] = results lastFile = False if fromIn == toIn: lastFile = True candelete = True if candelete: try: numb = fromIn os.remove(outputDic + str(numb) + ".pdb") os.remove(outputDic + str(numb) + ".sh") os.remove(outputDic + str(numb) + ".out") except Exception: # Then we are not in local pass fromIn = fromIn + 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print 'current_dir',current_dir # print "dirente",dirente cm.change_remote_dir(current_dir) # print cm.remove_remote_dir(os.path.basename(dirente)) # print "cm.get_remote_pwd()",cm.get_remote_pwd() SystemUtility.close_connection(DicGridConn, DicParameters, cm) # Write the SUM file filepath_sum = os.path.join(dirente, 'ellg_computation.sum') file_sum = open(filepath_sum, 'w') filepath_table = os.path.join(dirente, 'table_ellg.sum') file_table = open(filepath_table, 'w') file_table.write("Size\teLLG\tName\n") for ensemble in dict_result.keys(): file_sum.write("===========\n") file_sum.write("ENSEMBLE: " + ensemble + "\n") file_sum.write( "NRESIDUES: " + str(dict_result[ensemble]['number_of_residues']) + "\t FRACTION_SCATTERING: " + str( dict_result[ensemble]['fraction_scattering']) + "\t NRESIDUES FOR TARGET ELLG: " + str( dict_result[ensemble]['nres_for_target_ellg']) + "\n") file_sum.write("CURRENT ELLG: " + str( dict_result[ensemble]['ellg_current_ensemble']) + "\t RESOLUTION FOR TARGET ELLG: " + str( dict_result[ensemble]['resolution_for_target']) + "\n") file_sum.write("===========\n") # TEMPORARY? TABLE TO ANALYZE ELLG file_table.write(str(dict_result[ensemble]['number_of_residues']) + '\t' + str( dict_result[ensemble]['ellg_current_ensemble']) + '\t' + ensemble + '\n') del file_table del file_sum return dict_result def evaluateOCC(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, ensembles): dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.readlines() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) fe = open(outputDic + str(fromIn) + ".pdb", "r") fer = fe.readlines() fe.close() clus = None for luc in fer: if luc.startswith("REMARK CLUSTER"): clus = luc.split()[2] break if clus is None: clus = "-1" for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: # fut_name = outputDic+os.path.basename(ensembles["ensemble"+str(tud)][:-4]+"_"+str(cud)+".pdb") fut_name = outputDic + os.path.basename(ensembles["ensemble" + str(tud)][:-4] + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() os.remove(os.path.join(outputDic, ler)) fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(clus) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') try: os.remove(outputDic + str(nb) + ".1.mtz") except: pass if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateNMA(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, rotclusid, nqueue, ensembles): dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: fut_name = outputDic + os.path.basename( ensembles["ensemble" + str(tud)][:-4] + "_" + str(cud) + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() os.remove(os.path.join(outputDic, ler)) fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(rotclusid) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False): global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue # wse = cm.get_remote_file(str(i)+".1.pdb",os.path.join(outputDic,str(i)+".1.pdb"),lenght_ext=6,conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION,testEND=PHASER_RLIST_SOL_PDB_END_TEST) # if isinstance(wse,bool) and not wse: # print "File "+str(i)+".1.pdb not ready sleeping 3 seconds..." # time.sleep(3) # continue wse = cm.get_remote_file(str(i) + ".rlist", os.path.join(outputDic, str(i) + ".rlist"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".rlist not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: # atest = os.path.exists(os.path.join(outputDic,str(i)+".1.pdb")) atest = True btest = os.path.exists(os.path.join(outputDic, str(i) + ".rlist")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" new_euler = [] old_euler = [] llg = 0.0 zscore = 0.0 while riprova: try: try: fer = open(outputDic + str(i) + ".rlist", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 data_chain = {} for ferlinea in ferli: if ferlinea.startswith("SOLU TRIAL"): listy = ferlinea.split() llg = 0.0 zscore = 0.0 try: zscore = float(listy[9]) except: zscore = 0.0 new_euler = [float(listy[5]), float(listy[6]), float(listy[7])] try: p = subprocess.Popen('grep -A1 loggraph ' + outputDic + str(i) + ".out", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() outlist = outp.splitlines() llg = float(outlist[1].split()[1]) except: llg = 0.0 break # NOTE: TEMPORARY for test gyre structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(new_euler[0], new_euler[1], new_euler[2])], cell_dim, return_atoms=True) new_list_atoms += chain_atoms new_list_atoms = sorted(new_list_atoms, __cmp_atom) fer = open(outputDic + str(i) + ".sh", "r") ferli = fer.readlines() fer.close() for ferlinea in ferli: if ferlinea.startswith("#SPACEGROUP"): spaceGroup = ferlinea.split("#SPACEGROUP")[1] spaceGroup = spaceGroup.strip() if ferlinea.startswith("ROTATE"): lisr = ferlinea.split() old_euler = [float(lisr[4]), float(lisr[5]), float(lisr[6])] structure = Bioinformatics.getStructure("full", os.path.abspath( outputDic + os.path.basename(str(i) + ".pdb"))) old_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(old_euler[0], old_euler[1], old_euler[2])], cell_dim, return_atoms=True) old_list_atoms += chain_atoms old_list_atoms = sorted(old_list_atoms, __cmp_atom) PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_original.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBORIGINAL) f.close() PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_created.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBCREATED) f.close() # NOTE: TEST END cancel this block after test if os.path.exists(models_directory): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics.getStructure("full", ori_path) new_list_resi = [] structureCOMP = Bioinformatics.getStructure("full2", cStringIO.StringIO(PDBCREATED)) for model in structureCOMP.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): new_list_resi += [resi] old_list_resi = [] structureREF = Bioinformatics.getStructure("full3", cStringIO.StringIO(PDBORIGINAL)) for model in structureREF.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): old_list_resi += [resi] # print "+++++++",len(old_list_resi),len(new_list_resi) # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structureFINE, pdball) = Bioinformatics.getSuperimp( [old_list_resi], [new_list_resi], "PRECOMPUTED", algorithm="biopython", allAtomsModel=PDBCREATED, backbone=True) # ,listmodel=listcomp) lineas = PDBCREATED.splitlines() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structureFINE, listCoord, outputDic, title) else: rmsd = -100 nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = -100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = new_euler ro["frac"] = [0.0, 0.0, 0.0] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: #print "Trying to remove *sum files" #print sys.exc_info() pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.mtz") except: #print "Trying to remove *1.mtz files" #print sys.exc_info() pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False, ent=None): global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" while riprova: try: try: fer = open(outputDic + str(i) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0.0 zscore = 0.0 data_chain = {} first = False for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): if first: break else: listy = ferlinea.split() llg = float(listy[2].split("=")[-1]) first = True if ferlinea.startswith("SOLU GYRE"): topl = ferlinea.split() chainid = topl[4][-2] data_chain[chainid] = [float(topl[6]), float(topl[7]), float(topl[8]), float(topl[10]), float(topl[11]), float(topl[12])] # NOTE: TEMPORARY for test gyre """ structure = Bioinformatics.getStructure("full",os.path.join(outputDic,str(i)+".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(data_chain[chain_name][0],data_chain[chain_name][1],data_chain[chain_name][2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,data_chain[chain_name][3:],cell_dim,return_atoms=True) new_list_atoms += chain_atoms fer = open(outputDic+str(i)+".sh","r") ferli = fer.readlines() fer.close() euler = [] for ferlinea in ferli: if ferlinea.startswith("SOLU TRIAL"): topl = ferlinea.split() euler = [float(topl[5]),float(topl[6]),float(topl[7])] if ferlinea.startswith("SOLU SPAC"): spaceGroup = ferlinea.split("SOLU SPAC")[1] spaceGroup = spaceGroup.strip() structure = Bioinformatics.getStructure("full",os.path.abspath(outputDic+os.path.basename(str(i)+".pdb"))) old_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(euler[0],euler[1],euler[2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,[0.0,0.0,0.0],cell_dim,return_atoms=True) old_list_atoms += chain_atoms PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] f = open(os.path.abspath(outputDic+os.path.basename(str(i)+".1_original.pdb")),"w") CRD='{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data=dict(r='CRYST1',a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data)+"\n") f.write(PDBORIGINAL) f.close() PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] f = open(os.path.abspath(outputDic+os.path.basename(str(i)+".1_created.pdb")),"w") CRD='{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data=dict(r='CRYST1',a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data)+"\n") f.write(PDBCREATED) f.close() """ # NOTE: TEST END cancel this block after test if os.path.exists(models_directory): """ structure = Bioinformatics.getStructure("full",os.path.join(outputDic,str(i)+".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(data_chain[chain_name][0],data_chain[chain_name][1],data_chain[chain_name][2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,data_chain[chain_name][3:],cell_dim,return_atoms=True) new_list_atoms += chain_atoms """ structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".1.pdb")) new_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() new_list_atoms += chain_atoms # print "------",len(new_list_atoms),len(new_list_atoms) new_list_atoms = sorted(new_list_atoms, __cmp_atom) ori_path = SystemUtility.findInSubdirectory( os.path.basename(convNames[str(i) + ".1.pdb"][0]), subdirectory=models_directory) # if models_directory in convNames[str(i)+".1.pdb"][0]: # ori_path = convNames[str(i)+".1.pdb"][0] # else: # ori_path = os.path.join(models_directory,os.path.basename(convNames[str(i)+".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file # new_list_atoms = sorted(new_list_atoms,__cmp_atom) if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics.getStructure("full", ori_path) old_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() old_list_atoms += chain_atoms # print "------",len(old_list_atoms),len(new_list_atoms) old_list_atoms = sorted(old_list_atoms, __cmp_atom) PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] new_list_resi = [] structureCOMP = Bioinformatics.getStructure("full2", cStringIO.StringIO(PDBCREATED)) for model in structureCOMP.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): new_list_resi += [resi] old_list_resi = [] structureREF = Bioinformatics.getStructure("full3", cStringIO.StringIO(PDBORIGINAL)) for model in structureREF.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): old_list_resi += [resi] # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structureFINE, pdball) = Bioinformatics.getSuperimp( [old_list_resi], [new_list_resi], "PRECOMPUTED", algorithm="biopython", allAtomsModel=PDBCREATED, backbone=True) # ,listmodel=listcomp) lineas = PDBCREATED.splitlines() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structureFINE, listCoord, outputDic, title) else: rmsd = -100 nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = -100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) nresc = -1 rms0 = -1 diffrms = -1 p = subprocess.Popen('grep -c " CA " ' + os.path.join(outputDic, str(i) + ".1.pdb"), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() nresc = int(outp) nmin = nresc - 10 ndiff = -1 if os.path.exists(ent): # TODO: FIND the BEST Rt to superpose the template to the final structure # 1: start BORGES_MATRIX using the template as model reference # (rmsent,nrefent,ncoment,allent,cment,pdent) = Bioinformatics.getSuperimp(os.path.join(outputDic,str(i)+".1.pdb"),ent,"PDB",algorithm="minrms",backbone=True,minmaxrms=(core_init,core_init)) # (rms0,nref0,ncom0,all0,cm0,pd0) = Bioinformatics.getSuperimp(ori_path,ent,"PDB",algorithm="minrms",backbone=True,minmaxrms=(core_fin,core_fin)) # ndiff = ncom0-ncoment # rmsd_init_core_fin = rms0 # rmsd_fin_core_init = rmsent (rmsent, nrefent, ncoment, allent, cment, dicent) = Bioinformatics.getSuperimp( os.path.join(outputDic, str(i) + ".1.pdb"), ent, "PDB", algorithm="superpose", backbone=True) (rms0, nref0, ncom0, all0, cm0, dic0) = Bioinformatics.getSuperimp(ori_path, ent, "PDB", algorithm="superpose", backbone=True) rmsd_init = rms0 core_init = ncom0 dicc_init = dic0 rmsd_fin = rmsent core_fin = ncoment dicc_fin = dicent (rmsd_core_common_init, rmsd_core_common_fin, core_common) = Bioinformatics.getRMSDfromCommonCore(dicc_init, dicc_fin) diffrmsd = rmsd_core_common_init - rmsd_core_common_fin # print "Model:",os.path.join(outputDic,str(i)+".1.pdb"),"RMSD",rmsent,"NREF",nrefent,"NCOM",ncoment f = open(outputDic + "../pm_gyre.sum", "a") f.write("MODEL: " + convNames[str(i) + ".1.pdb"][0] + " SIZE: " + str( nresc) + " RMSD_GYRE: " + str("%.2f" % rmsd) + " RMSD_INIT: " + str( "%.2f" % rmsd_init) + " CORE_INIT: " + str(core_init) + " RMSD_FIN: " + str( "%.2f" % rmsd_fin) + " CORE_FIN: " + str(core_fin) + " RMSD_INIT_COMMON: " + str( "%.2f" % rmsd_core_common_init) + " RMSD_FIN_COMMON: " + str( "%.2f" % rmsd_core_common_fin) + " RMSD_DIFF: " + str( "%.2f" % diffrmsd) + " CORE_COMMON: " + str(len(core_common.keys())) + " LLG: " + str( llg) + "\n") f.close() """ dbdir = os.path.join(outputDicr,"pmdb") stored = os.path.join(outputDicr,"stored_sup") if os.path.exists(dbdir): shutil.rmtree(dbdir) if not os.path.exists(stored): os.makedirs(stored) os.makedirs(dbdir) nameofent = os.path.basename(ent)[:5] shutil.copyfile(ent, os.path.join(dbdir,nameofent+".pdb")) print "Starting BORGES to find",os.path.join(outputDic,str(i)+".1.pdb"),"into",ent,"..." Parameters = {} Parameters["model"] = ori_path #os.path.join(outputDic,str(i)+".1.pdb") Parameters["dir"] = dbdir Parameters["wdir"] = outputDicr Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 0 Parameters["nilges"] = 10 Parameters["enhance_fold"] = True Parameters["remove_coil"] = True pars,opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars,opt,doCluster=False,superpose=False,sym=sym,process_join=True) if os.path.exists(os.path.join(outputDicr,"./library/")): # 2: Find the best superposition between the template and all the extracted models f = open(os.path.join(outputDicr,"./library/"+"list_rmsd.txt"),"r") allines - f.readlines() rmsd_gyre_vs_ent = float(allines[0].split()[0]) best_model = allines[0].split()[1] shutil.copyfile(os.path.join(outputDicr,"./library/"+best_model),os.path.join(stored,str(i)+".1.sup.pdb")) shutil.rmtree(dbdir) os.remove(os.path.join(outputDicr,"input_search.pdb")) if os.path.exists(os.path.join(outputDicr,"./library/")): shutil.rmtree(os.path.join(outputDicr,"./library/")) """ if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") # f.write("MODEL: "+str(i)+" CORRESP.: "+os.path.abspath(outputDic+os.path.basename(convNames[str(i)+".1.pdb"][0]))+" RMSD_GYRE: "+str("%.2f" % rmsd)+" RMSD_INIT: "+str("%.2f" % rms0)+" RMSD_ENT: "+str("%.2f" % rmsd_gyre_vs_ent)+" DIFF: "+str("%.2f" % diffrms)+" GYRE_ALIGNED_RES: "+str(ncoment)+" RESDIFF: "+str(ndiff)+" OVER_RES: "+str(nresc)+" LLG: "+str(llg)+"\n") f.write("MODEL: " + str(i) + " CORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + " RMSD_GYRE: " + str( "%.2f" % rmsd) + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF_ATM: " + str( nref) + "\t" + "NCOM_ATM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = data_chain["A"][:3] ro["frac"] = data_chain["A"][3:] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(os.path.join(outputDic, str(i) + ".1.pdb"),outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE try: os.remove(outputDic + str(i) + ".pdb") except: pass os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sol") except: pass try: os.remove(outputDic + str(i) + ".sum") except: pass try: os.remove(outputDic + str(i) + ".rlist") except: pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.pdb") except: pass try: os.remove(outputDic + str(i) + ".1.mtz") except: pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRefP1(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, quate, convNames, ensembles, LIMIT_CLUSTER=None): global listaEva dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True while riprova: try: try: tupla = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(convNames[str(i) + ".1.pdb"][0], False) listcomp = tupla[1] # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structure, pdball) = Bioinformatics.getSuperimp( convNames[str(i) + ".1.pdb"][0], outputDic + str(i) + ".1.pdb", "PDB", algorithm="biopython", listmodel=listcomp) flon = open(outputDic + str(i) + ".1.pdb", "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structure, listCoord, outputDic, title) except: rmsd = -100 nref = 0 ncom = 0 # if rmsd == -100: # print "ATTENTION: Models not comparable!!!!" # tra,fixe,dis = readTranslationsFTF(outputDic,str(i),quate,"RNP_P1") fer = open(outputDic + str(i) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): listy = ferlinea.split() llg = float(listy[2][4:]) zscore = 0.0 try: zscore = float(listy[3][5:]) except: zscore = 0.0 break ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() # PULIZIA FILES INUTILI if maintainOrigCoord and rmsd != -100: os.remove(outputDic + str(i) + ".1.pdb") shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(outputDic + str(i) + ".1.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: pass os.remove(outputDic + str(i) + ".sol") try: os.remove(outputDic + str(i) + ".1.mtz") except: pass for t in range(numFrag): os.remove(outputDic + str(t) + "_" + str(i) + ".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: # print "Error...Trying to read again output files..." # print sys.exc_info() # traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) os.remove(outputDic + "refP1.mtz") if hasattr(cm, "channel"): print cm.remove_remote_file(nameJob + ".cmd") print cm.remove_remote_file("refP1.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) return CluWork, ensembles def startRNP(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if RNP_GYRE: RNP_GYRE = "\n MACMR CHAINS ON" else: RNP_GYRE = "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 conv2 = {} for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] if RNP_GYRE: rota['euler']=[0.0,0.0,0.0] rota['frac'] = [0.0, 0.0, 0.0] prio = (rota["llg"], rota["zscore"]) if tops != None and cou >= tops: break list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] if not USE_RGR: # Then we can use the original models print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") elif USE_RGR: # Entered condition of gyre, we need to copy the files print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) good_remote_path = os.path.join(current_dir2, os.path.basename(dirente2)) cm.copy_local_file(localfile=pdbf, remotefile=os.path.join(good_remote_path, str(val[1]) + "_" + str( counter) + ".pdb"), remote_path_asitis=True, send_now=True) if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("TOPFILES " + str(nrts + 30) + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRNPOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if RNP_GYRE: RNP_GYRE = "\n MACMR CHAINS ON" else: RNP_GYRE = "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for cds in range(len(ClusAll)): clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") # if Aniso: # f.write("MACANO PROTOCOL OFF"+"\n") # f.write("MACTNCS PROTOCOL OFF"+"\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(nrts + 30) + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") else: for valok in list_pdbs: valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] # lip.append(rota["original_rotcluster"].split("_")[-1]) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: lip.append(r["original_rotcluster"].split("_")[-1]) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) # print "!!!!!!!!!!",lip fixd = [] for ep in range(len(lip)): ced = lip[ep] initial = None # print "cambio de guardia",ced while True: rep = ros.pop(0) if initial != None and initial == rep: ros.append(rep) break if initial == None: initial = rep # print rep["original_rotcluster"].split("_")[-1], ced if rep["original_rotcluster"].split("_")[-1] == ced: if len(fixd) == 0: rep["original_rotcluster"] = ced rep["n_prev_cluster"] = int(ced) else: rep["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + ced rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) # if ep == len(lip)-1: # rep["fixed_frags"] = fixd # else: rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) # print "He entrado aqui" break else: # print "No es la que quiero paso a la proxima" ros.append(rep) rota = ros.pop() if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] # print rota["original_rotcluster"] if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) conv2[str(counter) + "." + str(s) + ".pdb"] = (pdbf, rota["name"]) f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def __organizeClustersByFixedFragAndIDComb(ClusAll): ClutAll = [] for clu in ClusAll: dicsupp = {} for item in clu["heapSolutions"]: prio, rota = item fixed = [] if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: fixed.append(rotafi["euler"]) jso = json.dumps(fixed) if jso in dicsupp.keys(): dicsupp[jso].append(rota) else: dicsupp[jso] = [rota] for key in dicsupp.keys(): dirf = {"heapSolutions": ADT.Heap()} for rota in dicsupp[key]: dirf["heapSolutions"].push((-1 * rota["llg"], -1 * rota["zscore"]), rota) ClutAll.append(dirf) return ClutAll def startFTF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False): if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): #print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FTF" + "\n") f.write('HKLIN "tran.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test fast translation for models" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if PACK_TRA: f.write("TRANS PACK USE ON" + "\n") f.write("TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n") else: f.write("TRANS PACK USE OFF" + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") else: f.write("TNCS USE ON\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " # " + rotafi["name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " # " + rota["name"] + " CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " # " + rotafi["name"] + "\n") cou += 1 if sampl != -1: f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("TRANSLATE VOLUME FULL" + "\n") f.write("PEAKS TRA SELECT PERCENT" + "\n") f.write("PEAKS TRA CUTOFF " + str(save_tra) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE TRANSLATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFTFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startFTFOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, usePDO=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False): # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 ClusAll = __organizeClustersByFixedFragAndIDComb(ClusAll) used_ensembles = {} for cds in range(len(ClusAll)): clu = ClusAll[cds] s = 0 nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = (rota["llg"], rota["zscore"]) if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 for pdbf in list_pdbs.keys(): try: val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # print "pdb",cm.get_remote_pwd(),os.path.join(cm.remote_library_path,os.path.basename(pdbf)) for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # f = open(outputDirectory+"/"+str(counter)+".sh", "w") headlines = "" enselines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FTF" + "\n" headlines += 'HKLIN "tran.mtz"' + "\n" headlines += 'HKLOUT OFF' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' headlines += "TITLE Test fast translation for models" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" if PACK_TRA: headlines += "TRANS PACK USE ON" + "\n" headlines += "TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n" else: headlines += "TRANS PACK USE OFF" + "\n" headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" if not USE_TNCS: headlines += "TNCS USE OFF\n" else: headlines += "TNCS USE ON\n" headlines += "SEARCH METHOD FAST" + "\n" headlines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: headlines += "SOLPARAMETERS BULK USE ON" + "\n" headlines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: headlines += "SOLPARAMETERS BULK USE OFF" + "\n" if frag_fixed > 1: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" # print "=================FIXED==================",counter for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # print "EULER:",rotafi["euler"],"FRAC",rotafi["frac"] if not str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str( list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rotafi["name"]] else: used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rotafi["name"]) taillines += "SOLU 6DIM ENSE " + \ used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " # " + rotafi[ "name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prifi[0]) + " ZSCORE: " + str( prifi[1]) + "\n" # print "========================================",counter else: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" s = 0 # print "==============================ROTATIONS=============================",counter for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) # print "NAME:",rota["name"],"EULER:",rota["euler"],"FRAC",rota["frac"] if "n_prev_cluster" in rota: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota[ "name"] + " CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n" else: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota["name"] + "\n" s += 1 # print "========================================================================",counter if sampl != -1: taillines += "SAMPLING TRA " + str(sampl) + "\n" taillines += "TRANSLATE VOLUME FULL" + "\n" taillines += "PEAKS TRA SELECT PERCENT" + "\n" taillines += "PEAKS TRA CUTOFF " + str(save_tra) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE TRANSLATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFTFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all translations with a peak >= " + str(save_tra) print "Saving all translations with a peak >= " + str(save_tra) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def put_same_unit_cell_and_symm(list_frags, cell_dim, ref_point): struss = [] lista_trials = [("+", 0.0), ("*", -1.0), ("+", -1.0), ("+", 1.0)] parameters = {} p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None cx = None cy = None cz = None if len(ref_point) > 0: cx = ref_point[0] cy = ref_point[1] cz = ref_point[2] print "Center of mass of ent:", cx, cy, cz for frag in list_frags: print "Processing new fragment..." frag_orig = [] frag_move = [] no_convert = False # no_trial_zero = False all_first_point = [] p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None for atom in frag: if isinstance(atom, list): ux = atom[0] uy = atom[1] uz = atom[2] else: atm = atom.get_coord() ux = atm[0] uy = atm[1] uz = atm[2] nx, ny, nz, parameters = convertFromOrthToFrac(ux, uy, uz, cell_dim, parameters) # print print "Original fractional", nx, ny, nz frag_orig.append(numpy.array([ux, uy, uz])) if no_convert: continue if wx is not None and wy is not None and wz is not None: d1 = numpy.sqrt(((wx - ux) * (wx - ux)) + ((wy - uy) * (wy - uy)) + ((wz - uz) * (wz - uz))) # print "distance1",d1,"--",wx,wy,wz,"-",ux,uy,uz # wx = ux # wy = uy # wz = uz if nx < 0: nx = -1 * nx if ny < 0: ny = -1 * ny if nz < 0: nz = -1 * nz nx = nx - numpy.modf(nx)[1] ny = ny - numpy.modf(ny)[1] nz = nz - numpy.modf(nz)[1] if len(p) > 0: hp = ADT.Heap() for g in p: px = g[0] py = g[1] pz = g[2] # print "Using",px,py,pz try: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: # no_convert = True if ada[0] == "+": sx = nx + ada[1] elif ada[0] == "*": sx = nx * ada[1] if adb[0] == "+": sy = ny + adb[1] elif adb[0] == "*": sy = ny * adb[1] if adc[0] == "+": sz = nz + adc[1] elif adc[0] == "*": sz = nz * adc[1] # print "Application of trial",sx,sy,sz sx, sy, sz, parameters = convertFromFracToOrth(sx, sy, sz, cell_dim, parameters) d2 = numpy.sqrt( ((px - sx) * (px - sx)) + ((py - sy) * (py - sy)) + ((pz - sz) * (pz - sz))) # print "distance2",d2,"--",px,py,pz,"-",sx,sy,sz # print "LOOK AT IT:",d1-d2,ada,adb,adc df = abs(d1 - d2) ang = ( 57.2957795 * angle_between([wx - ux, wy - uy, wz - uz], [px - sx, py - sy, pz - sz], [0.0, 0.0, 1.0], signed=False)) # if abs(d1-d2) <= 0.05 and (57.2957795*angle_between([wx-ux,wy-uy,wz-uz],[px-sx,py-sy,pz-sz],[0.0,0.0,1.0],signed=False)) <= 0.05: if df <= 9.0 and ang <= 9.0: # no_convert = False if len(p) > 1 and cx is not None and cy is not None and cz is not None: d3 = numpy.sqrt(((cx - sx) * (cx - sx)) + ((cy - sy) * (cy - sy)) + ( (cz - sz) * (cz - sz))) hp.push((df, ang, d3), (sx, sy, sz, px, py, pz)) # it is a MinHeap # print "distance found",d3,sy,sy,sz else: hp.push((df, ang, 1), (sx, sy, sz, px, py, pz)) # raise Exception # print "It is compatible:",d1-d2,ada,adb,adc except: pass if hp.len() > 0: no_convert = False item = hp.pop() nx = item[1][0] ny = item[1][1] nz = item[1][2] px = item[1][3] py = item[1][4] pz = item[1][5] print "minim values", item[0], nx, ny, nz # df,ang,dis = item[0] # if df > 1.5 or ang > 1.0: # print "Values are too far from correct, cannot be accepted" # no_convert = True else: no_convert = True if not no_convert: frag_move[-1] = numpy.array([px, py, pz]) # break p = [] else: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: if ada[0] == "+": qx = nx + ada[1] elif ada[0] == "*": qx = nx * ada[1] if adb[0] == "+": qy = ny + adb[1] elif adb[0] == "*": qy = ny * adb[1] if adc[0] == "+": qz = nz + adc[1] elif adc[0] == "*": qz = nz * adc[1] qx, qy, qz, parameters = convertFromFracToOrth(qx, qy, qz, cell_dim, parameters) p.append([qx, qy, qz]) if no_convert: print "ATTENTION CAN'T FIND A FRAC COORDS FOR ATOM. USING ORIGINAL POSITIONS..." continue if len(p) == 0: p.append([nx, ny, nz]) wx = ux wy = uy wz = uz frag_move.append(numpy.array([nx, ny, nz])) q = 0 for atom in frag: if no_convert: atom.set_coord(frag_orig[q]) else: atom.set_coord(frag_move[q]) q += 1 struss.append(atom) return struss def translateListByFrac(num, num2, struct, frac, cell_dim, return_atoms=False): structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) if not return_atoms: structure.append(numpy.array([nx, ny, nz])) else: atom.set_coord(numpy.array([nx, ny, nz])) if not return_atoms: return structure else: return struct def rotateListByMatrix(num, num2, struct, matrices, cell_dim, return_atoms=False): structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz if not return_atoms: structure.append(numpy.array([x, y, z])) else: atom.set_coord(numpy.array([x, y, z])) if not return_atoms: return structure else: return struct def rotateStructureByMatrix(num, num2, struct, matrices, outputPath, cell_dim, writePDB=True, filename=None): structure = copy.deepcopy(struct) if not os.path.exists(outputPath): os.makedirs(outputPath) if writePDB: if filename == None: pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") else: pdb = open(outputPath + str(filename) + "_rot.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz atom.set_coord(numpy.array([x, y, z])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() else: return structure def rotateStructureByListCoord(num, num2, structure, listCoord, outputPath, title): pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") indice = 0 pdb.write("REMARK TITLE " + title + "\n") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): nx = (listCoord[indice]).get_coord()[0] ny = (listCoord[indice]).get_coord()[1] nz = (listCoord[indice]).get_coord()[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) indice += 1 pdb.close() def angleRadBetweenVectors(vec1, vec2): X1 = vec1[0] Y1 = vec1[1] Z1 = vec1[2] X2 = vec2[0] Y2 = vec2[1] Z2 = vec2[2] scaP2 = (X1 * X2) + (Y1 * Y2) + (Z1 * Z2) parallequiv = (numpy.sqrt((X1 * X1) + (Y1 * Y1) + (Z1 * Z1))) * (numpy.sqrt((X2 * X2) + (Y2 * Y2) + (Z2 * Z2))) cosTetaReal = scaP2 / parallequiv # print "scaP2",scaP2 # print "parallequiv",parallequiv # print "cosTetaReal",cosTetaReal # Take care of roundoff errors # cosTetaReal = numpy.min(cosTetaReal,1) # cosTetaReal = numpy.max(-1,cosTetaReal) TetaReal = numpy.arccos(cosTetaReal) return TetaReal def matrixFromEulerAngles2(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ Conversion 323 from James Diebel 2006 """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = (numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = -1 * numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def matrixFromEulerAngles(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ http://en.wikipedia.org/wiki/Euler_angles Relationship to other representations Rotation matrix: ZYZ """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = ((-1) * numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) - (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) + (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = ((-1) * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = (-1) * numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def clusterAtOnceSols(DicParameters, listaAllsol, isArcimboldo, renamePDBs, rnp_sol, baseDir, name, quate, laue, listNCS, excludeZSCORE, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, convNames, tops=None, LIMIT_CLUSTER=None, giveids=False): riprova = True while riprova: try: traslazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops) riprova = False except: # print "Error...Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) num = 0 nclu = None listaFileDel = [] listaKeyDel = [] listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} dictioNameClus = {} clus = -1 brat = {} nfixfrags = 0 fullcluname = "" for rotaz in traslazioni: num += 1 # print "Validating "+str(num)+"\\"+str(len(traslazioni))+" traslations..." rotaz["numInSol"] = num if len(convertnames.keys()) > 0: ensembles[rotaz["name"]] = ensembles[convertnames[rotaz["name"]]] if "fixed_frags" in rotaz and (len(convertnames.keys()) > 0 or not isArcimboldo): for fi in rotaz["fixed_frags"]: ensembles[fi["name"]] = ensembles[rotaz["name"]] nfixfrags = len(rotaz["fixed_frags"]) + 1 if giveids: nameFi = ensembles[rotaz["name"]] if rotaz["name"] not in listaKeyDel: listaKeyDel.append(rotaz["name"]) nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "-" + str(rotaz["numInSol"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) # print "nome:",rotaz["name"]+"-"+str(rotaz["numInSol"]),"file:",nameFi ensembles[rotaz["name"] + "-" + str(rotaz["numInSol"])] = nameFi # nuovoPath rotaz["name"] = rotaz["name"] + "-" + str(rotaz["numInSol"]) # if nameFi not in listaFileDel: # listaFileDel.append(nameFi) # print "--!!!!--",rotaz["name"] if (len(rotaz["fixed_frags"]) == 0 or LIMIT_CLUSTER != None) and len(dizioClu.values()) > 0: nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = str(nclu) rotaz["n_prev_cluster"] = nclu elif len(rotaz["fixed_frags"]) > 0 and len(dizioClu.values()) > 0: # print "--------dizioClu--------" # print dizioClu # print rotaz["name"] # print "------------------------" nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = rotaz["fixed_frags"][-1]["original_rotcluster"] + "_" + str(nclu) rotaz["n_prev_cluster"] = __getIDClusterFromDescription(rotaz["original_rotcluster"]) clus = rotaz["n_prev_cluster"] if rotaz["zscore"] < excludeZSCORE: continue rotaz["elong"] = 0 # rotaz["fixed_frags"] = fixed listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInSol"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInSol"])] = rotaz dictioNameClus[rotaz["name"]] = rotaz["original_rotcluster"] fullcluname = rotaz["original_rotcluster"] if mode == "RNP": for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tops != None and cud > tops: try: os.remove(baseDir + ler) except Exception: pass continue for bla in listaRotNumInRlist: # print "====",bla[1],"======",tud,cud,"....",int(name) if bla[1] == cud and tud == int(name): rnp_sol[ler] = bla[0] brat[ler] = bla[0] break if mode == "RNP" and renamePDBs: if isArcimboldo: for ler in brat.keys(): cud = int((ler.split("."))[0]) # print ler, brat[ler], convNames[ler][0], cud, int(name) if cud == int(name): f = open(baseDir + ler, "r") allf = f.read() f.close() # print dictioNameClus nede = baseDir + os.path.basename(convNames[ler][0])[:-4] + brat[ler].split("ensembleID")[1] # print ".....--..------.....",nede,laue,os.path.exists(os.path.join(baseDir,"../../3_FTF_LIBRARY/")),os.path.join(baseDir,"../3_FTF_LIBRARY/") if str(laue) == "1" and not os.path.exists(os.path.join(baseDir, "../../3_FTF_LIBRARY/")): nede += "-1.pdb" else: nede += ".pdb" f = open(nede, "w") druppo = str(dictioNameClus[brat[ler]]) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo + "\n")) f.write(allf) f.close() os.remove(baseDir + ler) # shutil.move(baseDir+ler,baseDir+os.path.basename(convNames[ler][0])[:-4]+brat[ler].split("ensembleID")[1]+".pdb") # Associare a .n.pdb l'ensembleID del primo nel .n.pdb ensembles[brat[ler]] = nede else: f = open(baseDir + name + ".1.pdb", "r") allf = f.read() f.close() # print "+++++++++++",convNames[name+".1.pdb"] nomed = baseDir + os.path.basename(convNames[name + ".1.pdb"][0]) if len(convNames[name + ".1.pdb"][1].split("-")) > 1: nomed = nomed[:-4] + "-" + convNames[name + ".1.pdb"][1].split("-")[1] + ".pdb" f = open(nomed, "w") druppo = str(fullcluname) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(allf) f.close() os.remove(baseDir + name + ".1.pdb") # shutil.move(baseDir+name+".1.pdb",baseDir+os.path.basename(convNames[name+".1.pdb"][0])) ensembles[convNames[name + ".1.pdb"][1]] = nomed listaAllsol[0] += listaQuaternions listaAllsol[1] += listaRotNumInRlist listaAllsol[2].update(listaDictioNameNumInRlist) if giveids: for ele in listaKeyDel: del ensembles[ele] return ensembles, listaAllsol, nfixfrags def clusterAllRotInList(DicParameters, listrot, isArcimboldo, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False): global LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID = 0 ClusAll = [] RotClu = [] ensembles = {} listaAllrot = [[], [], {}, [[], RotClu]] num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "////////",len(listrot) for ro in listrot: rotaz, pdbname = ro num += 1 # print "Checking the "+str(num)+"\\"+str(len(listrot))+" rotation..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue fixed = [] if "fixed_frags" in rotaz.keys(): fixed = rotaz["fixed_frags"] name = 0 if giveids: nameFi = pdbname nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz if not isArcimboldo or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif not isArcimboldo or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: \ # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro \ # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return listaAllrot[3][0], listaAllrot[3][1], ensembles else: return listaAllrot[3][0], listaAllrot[3][1] def clusterAtOnce(DicParameters, listaAllrot, isArcimboldo, baseDir, name, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False, lastFile=False): global LAST_AVAILABLE_ROTID riprova = True ClusAll = listaAllrot[3][0] RotClu = listaAllrot[3][1] while riprova: try: if mode == "FRF": rotazioni, fixed = readRotationsFRF(baseDir, name, quate, tops=tops) elif mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops) riprova = False except: print "Error...Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # print "lette "+str(len(rotazioni))+" rotazioni..." # print "i clusters sono ", len(Clusters) num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "////////",len(rotazioni) for rotaz in rotazioni: num += 1 # print "Collocando la "+str(num)+"\\"+str(len(rotazioni))+" rotazione..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue if giveids: nameFi = ensembles[rotaz["name"]] # del ensembles[rotaz["name"]] nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) rotaz["fixed_frags"] = fixed # listaQuaternions.append([rotaz["quaternion"][0],rotaz["quaternion"][1],rotaz["quaternion"][2],rotaz["quaternion"][3]]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz # print "((((((((((((((((((((((((((" # for cuu in sorted(listaQuaternions): # print cuu,cuu[0]+cuu[1]+cuu[2] # print "))))))))))))))))))))))))))" # print "Arcimboldo?",isArcimboldo,"name",name,"type(name)",type(name) if (not isArcimboldo and name == "0") or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): if len(ClusAll[inderec]["heapSolutions"].asList()) == 0: continue prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif (not isArcimboldo and name != "0") or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return ensembles, listaAllrot else: return listaAllrot def __getIDClusterFromDescription(nameClustDesc): global MAP_OF_ROT_COMB clut = tuple(map(lambda x: int(x), nameClustDesc.split("_"))) if len(clut) == 1: return clut[0] if clut in MAP_OF_ROT_COMB.keys(): return MAP_OF_ROT_COMB[clut] else: # NOTE: if we create the permutations than we will not distinguish 0,1 from 1,0 it means that # that we can choose just one combinations to follow up (0,1) or (1,0) but not both # if we make this distinction here we will have two different type of combinations one for (0,1) and another for (1,0) qp = MAP_OF_ROT_COMB.values() t = 1 if len(qp) > 0: t = max(qp) + 1 MAP_OF_ROT_COMB[clut] = t return t def __mergeEquivalentRotCombination(): global MAP_OF_ROT_COMB equals = {} value_done = [] for key in MAP_OF_ROT_COMB.keys(): value = MAP_OF_ROT_COMB[key] if value not in value_done: value_done.append(value) equals[value] = [] e = itertools.permutations(key) for q in e: if q in MAP_OF_ROT_COMB.keys() and MAP_OF_ROT_COMB[q] != value: equals[value].append(MAP_OF_ROT_COMB[q]) value_done.append(MAP_OF_ROT_COMB[q]) return equals def saveRotations(DicParameters, listaAllrot, LIMIT_CLUSTER=None, applyNameFilter=False): ClusAll = listaAllrot[3] num = 0 rotazioni = listaAllrot[2].values() pdb_done = [] indes = len(ClusAll) for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): # print "/////" # print rotaz # print "////" if applyNameFilter and (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) if "n_prev_cluster" not in rotaz.keys(): if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) indes += 1 if len(ClusAll) <= rotaz["n_prev_cluster"]: while len(ClusAll) < rotaz["n_prev_cluster"] + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[rotaz["n_prev_cluster"]]["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) return ClusAll def angle_between(A, B, N, signed=True): # ANGLE BETWEEN TWO 3D VECTORS: # 1- dot(norm(A),norm(B)) (ANGLES UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 2- arcos(dot(A,B)/(|A|*|B|)) (ANGLE UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 3- arctan2(|cross(A,B)|,dot(A,B)) (ANGLE UNSIGNED BUT NOT PROBLEMS OF ROUNDINGS # define a vector NORM ex.: N = [0,0,1] # sign = dot(NORM,cross(A,B)) # if sign < 0 then ANGLE measured in 3 should be negative CrossX = A[1] * B[2] - A[2] * B[1] CrossY = A[2] * B[0] - A[0] * B[2] CrossZ = A[0] * B[1] - A[1] * B[0] fCross = numpy.sqrt(CrossX * CrossX + CrossY * CrossY + CrossZ * CrossZ) scaP2 = (A[0] * B[0]) + (A[1] * B[1]) + (A[2] * B[2]) Teta_2 = numpy.arctan2(fCross, scaP2) if signed: sign = (N[0] * CrossX) + (N[1] * CrossY) + (N[2] * CrossZ) if sign < 0: Teta_2 = -Teta_2 return Teta_2 else: return Teta_2 def simpleDistributionOrientationComparison4(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None try: structureA = [[-0.048, 3.749, 0.000], [92.111, 48.141, 16.362]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if "angle" in rot2.keys(): if rot2["angle"] >= TETA_1: rot2["angle"] = TETA_1 else: rot2["angle"] = TETA_1 if TETA_1 <= 1000 and TETA_1 <= threshold: return True else: return False def simpleDistributionOrientationComparison3(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getAtomsList("A", convNames[rot1["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model", rot1["name"], "not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) try: structureA = Bioinformatics.getAtomsList("A", convNames[rot2["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 2" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 print "ATTENTION: Model", rot2["name"], "not found... Using just the model of rot1" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison2(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot1["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 try: structureA = Bioinformatics.getStructure("A", convNames[rot2["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() if print_angles: print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim): # if not (os.path.exists("./temp/")): # os.makedirs("./temp/") structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/0_0_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru00,False,False,"stru00") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/0_0_rot.pdb", False) Astructure = tupleResult[0] AlistFrags = tupleResult[1] # Bq = rot2["quaternion"] Bq = rot2["rotationMatrices"] rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/1_1_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru11,False,False,"stru11") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/1_1_rot.pdb", False) Bstructure = tupleResult[0] BlistFrags = tupleResult[1] """ #============TEMPORANEO=============== maxl = 0 ind = 0 for i in range(len(AlistFrags)): if maxl < (AlistFrags[i])["fragLength"]: ind = i maxl = (AlistFrags[i])["fragLength"] AlistFrags = [AlistFrags[ind]] BlistFrags = [BlistFrags[ind]] #===========TEMPORANEO================ """ # print "lenA",len(AlistFrags),"lenB",len(BlistFrags) nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(AlistFrags, BlistFrags, threshold, shift, where) """ print "---------------" print str(rot1["quaternion"]) print str(rot2["quaternion"]) print str(anyWay) print nWindows,len(comp_windows) print "---------------" """ # os.remove("./temp/0_0_rot.pdb") # os.remove("./temp/1_1_rot.pdb") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]) print "---------------" """ else: """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]),shift,where print "---------------" """ return False, len(comp_windows[t]) return result, None def compareRotation(rot1, rot2, treshold, mode, quate, laue, listNCS, convNames, cell_dim, evaLLONG, print_angles=False): if mode == "distributionCV": investigate = False for shift in range(8): if shift > 0 and not evaLLONG: break if not evaLLONG: shift = 0 else: shift = shiftew firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "A", cell_dim) if firstResult[0]: return True, shift else: # cerca equivalenza per simmetria contr = 0 secondRe = None for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondRe = (True, shift) break elif secondResult[1] > 0: investigate = True if secondRe != None: return secondRe else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if thirdResult[0]: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, shift if not firstResult[1] and not investigate: break if evaLLONG: investigate = False for shift in range(1, 8): firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "B", cell_dim) if firstResult[0]: return True, -1 * shift else: # cerca equivalenza per simmetria contr = 0 for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "B", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) return True, -1 * shift elif secondResult[1] > 0: investigate = True if not firstResult[1] and not investigate: break return False, 0 elif mode == "rot_matrices": firstResult = simpleDistributionOrientationComparison4(rot1, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if firstResult: return True, 0 else: # cerca equivalenza per simmetria contr = 0 secondResult = False for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if secondResult: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondResult = True break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if thirdResult: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, 0 return False, 0 elif mode == "quaternion": q1 = rot1["quaternion"] q2 = rot2["quaternion"] # if "simmetry_rotated" in rot1: # q1 = rot1["simmetry_rotated"] # if "simmetry_rotated" in rot2: # q2 = rot2["simmetry_rotated"] firstResult = simpleQuaternionComparison(q1, q2, treshold, quate) # print "Direct compare:",firstResult if firstResult: return True, 0 else: # cerca equivalenza per simmetria secondResult = False for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q secondResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if secondResult: rot1["simmetry_rotated"] = new_quat # return True,0 break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rtq = quate.convertRotMatrixToQuaternion2(ncs) new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q thirdResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if thirdResult: rot1["simmetry_rotated"] = new_quat # print rot1["name"], "COMPATIBLE WITH", new_quat return True, 0 return False, 0 def simpleQuaternionComparison2(rot1q, rot2q, treshold, quate): """ Thesis Robert Adam Nicholls """ angle = quate.QuaternionDotProduct(rot1q, rot2q) # 2. n(th) Chebyshev Polynomial angle = 2 - 2 * (angle ** 2) print "distance cosin", angle # 3. Compute the arcosin of the angle of the wauternion above angle = numpy.arccos(angle) # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree print "Angle degreee: ", angle_degree if angle_degree <= treshold: return True else: return False def simpleQuaternionComparison(rot1q, rot2q, treshold, quate): """ James Diebel 2006 """ ############## # NEW METHOD # ############## # 1. Compute the quaternion inverse rot1q inv = quate.QuaternionInverse(rot1q) # print inv # 2. Compute the product of rot2q and thr inverse of rot1q res = quate.QuaternionProduct(rot2q, inv) # res = quate.QuaternionProduct(inv,rot2q) # print "QProduct",res # print "QProduct2",quate.QuaternionProduct(inv,rot2q) # 3. Compute the arccos of the angle of the quaternion above angle = numpy.arccos(res[3]) # 4. Multiply the angle for 2.0f to have the angle in radians angle_rad = angle * 2.0 # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle_rad * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree # print "Angle degreee: ",angle_degree if angle_degree <= treshold: return True else: return False # tetaThreshRad = (treshold*2*numpy.pi)/360 # treshAngleRot = (treshAngleRot*2*numpy.pi)/360 # tetaVect = angleRadBetweenVectors(rot1q[:-1], rot2q[:-1]) # uno = (tetaVect <= tetaThreshRad) # due = (numpy.abs(rot1q[-1]-rot2q[-1]) <= treshAngleRot) # print "//////////////" # print "Comparing:" # print str(rot1q) # print str(rot2q) """ dot = (rot1q[3]*rot2q[3]) + (rot1q[0]*rot2q[0]) + (rot1q[1]*rot2q[1]) + (rot1q[2]*rot2q[2]) #print "dot is:",abs(dot),(1.0-abs(dot)),treshold #print "////////////" dot = abs(dot) if (1.0-dot) <= treshold: #if ((rot2q[3]<0 and rot1q[3]<0) or (rot2q[3]>=0 and rot1q[3]>=0)): # return True #else: # return False return True else: return False """ # print "======================================" # print rot1q # print rot2q # print uno,due # print tetaVect, tetaThreshRad, uno # print rot1q[-1], rot2q[-1], treshAngleRot, due # print "======================================" # return (uno and due) def readTranslationsFTF(baseDir, name, quate, mode, tops=None): sol = baseDir + name + ".sol" out = baseDir + name + ".out" sh = baseDir + name + ".sh" fixed = [] diziona = {} FOM = {} savellg = 0.0 savezscore = 0.0 diziocorresp = {} dinuovoname = "" cutoff = 0 if mode != "PICASSO": fi = open(out, "r") traFOM = fi.readlines() fi.close() if len(traFOM) == 0: raise Exception(".out not ready!") if not os.path.exists(sol): return ([], [], {}, {}) fi = open(sh, "r") script = fi.readlines() fi.close() fixed = [] first = False second = False nfixed = 0 nfrags = 0 for linea in script: lista = linea.split() if lista[0] == "PACK" and lista[1] == "CUTOFF": cutoff = int(lista[2]) if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: second = True nfrags = 0 # if mode != "RNP": # break if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: dizio = {} dizio["original_rotcluster"] = str(lista[17]) dizio["zscore"] = float(lista[21]) dizio["llg"] = float(lista[19]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) else: dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) elif mode in ["RNP", "PACK"]: dizio = {} dizio["original_rotcluster"] = str(lista[15]) dizio["zscore"] = float(lista[19]) dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) else: dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) nfixed = nfrags - 1 if mode in ["PACK"] and not second: savellg = fixed[-1]["llg"] savezscore = fixed[-1]["zscore"] dinuovoname = fixed[-1]["name"] fixed = fixed[:-1] elif mode in ["RNP", "PACK"]: fixed = [fixed[i:i + nfixed + 1] for i in range(0, len(fixed), nfixed + 1)] if mode == "RNP": startc = 0 for linea in traFOM: if startc >= 1: led = linea.strip().split() if len(led) == 0: break if led[0] != "---": zs = 0.0 if led[5] != "n/a": zs = float(led[5]) if led[0].startswith("Top"): diziocorresp[int(led[0].split("Top")[1])] = ( int(led[0].split("Top")[1]), float(led[6]), zs) # PHASER 2.6.0 else: diziocorresp[int(led[0])] = (int(led[0]), float(led[6]), zs) # diziocorresp[int(led[1])] = (int(led[0]),float(led[4])) #{num_in_sol:(num_in_sh,llg_refined)} # if linea.strip().startswith("#+ #* (Initial LLG & Rval)"): # if linea.strip().startswith("#+ #* (Start LLG Rval TFZ"): #PHASER 2.5.5 if linea.strip().startswith("#out =#out"): # PHASER 2.7.9 startc += 1 elif mode == "PACK": startc = 0 diziocorresp = {} for linea in traFOM: if startc >= 1: led = linea.split() if len(led) == 0: break if led[2].startswith(">"): led[2] = 10000 diziocorresp[int(led[0])] = float(led[2]) # PHASER 2.7.x {num_in_sh:percent_clashes} # if linea.strip().startswith("# # #Clash Packs"): #PHASER 2.5.5 if linea.strip().startswith("#in #out"): # PHASER 2.7.9 startc += 1 diziona = {} list_trial = [] if mode == "TRA": for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "TRIAL": if len(lista) == 14: diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[13]) list_trial.append(lista[11]) for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[17]) list_trial.append(lista[15]) elif mode == "PACK" or mode == "RNP": leggiTra1 = False leggiTra2 = False temp = [] for linea in script: lista = linea.split() if lista[0] == "ROOT": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] break if lista[0] == "SOLU" and lista[1] == "SET": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2: leggiTra1 = False dizio = {} dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["n_prev_cluster"] = int(lista[15]) temp.append(dizio) FOM = {} start = False startskip = -1 section = False for linea in traFOM: if startskip > 0: startskip -= 1 continue if startskip == 0: start = True startskip = -1 if linea.startswith("Number LLG Z-Score"): section = True continue if mode != "RNP" and section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if mode == "RNP" and linea.strip() == "Refinement Table (Sorted)": startskip = 3 continue if mode != "RNP" and start and linea.startswith("$$"): start = False break if mode == "RNP" and start and len(linea.split()) == 0: start = False break if start: lista = linea.strip().split() if mode == "TRA": FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] elif mode == "RNP": zs = 0.0 if lista[0] != "---": if lista[5] != "n/a": zs = float(lista[5]) if lista[0].startswith("Top"): FOM[int(lista[0].split("Top")[1])] = [float(lista[6]), zs] # PHASER 2.6.0 else: FOM[int(lista[0])] = [float(lista[6]), zs] # PHASER 2.6.0 # FOM[int(lista[0])] = [float(lista[4]),0.0] fi = open(sol, "r") traslazioni = fi.readlines() fi.close() tra = [] numero = 1 leggiTra1 = False leggiTra2 = False temp = [] PREVAL = {} numera2 = 0 tcns = False convertnames = {} nameRota = "" vrms = None for linea in traslazioni: if tops != None and numero > tops: break lista = linea.split() # Following 2 lines are for PHASER 2.5.5 if lista[0] == "SOLU" and lista[1] == "ENSEMBLE": if "#VRMS" in lista: ind = lista.index("#VRMS") + 1 if len(lista) > ind: if vrms is None or vrms > float(lista[ind]): vrms = float(lista[ind]) continue # New Phaser have this new line to skip if lista[0] == "SOLU" and lista[1] == "HISTORY": valua = lista[2] if "PAK" in lista[2]: valua = lista[3] if mode == "TRA": if "RF/TF" in valua: index_rlist = int(valua.split("RF/TF")[1].split("/")[0][1:]) - 1 nameRota = list_trial[index_rlist] else: index_rlist = int(valua.split("RF")[1].split(")")[0][1:]) - 1 nameRota = list_trial[index_rlist] continue # Phaser 2.6.1 if lista[0] == "SOLU" and lista[1] == "SET": if lista[-1] == "+TNCS": tcns = True if mode == "PICASSO": numera2 += 1 PREVAL[str(numera2)] = [float(((lista[5]).split("="))[-1]), float(((lista[3]).split("="))[-1])] # elif mode == "RNP": # numera2 += 1 # PREVAL[str(numera2)] = float(lista[-1][5:]) if leggiTra2: leggiTra2 = False # print "ACTUAL VRMS1 is",vrms if vrms is not None: (temp[-1])["vrms"] = vrms if mode == "PACK": (temp[-1])["zscore"] = savezscore (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP": # if lista[-1].startswith("TFZ=="): # (temp[-1])["zscore"] = PREVAL[str(numero)] # else: # (temp[-1])["zscore"] = 0.0 (temp[-1])["zscore"] = diziocorresp[numero][2] (temp[-1])["llg"] = diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] else: (temp[-1])["zscore"] = FOM[numero][1] (temp[-1])["llg"] = FOM[numero][0] if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw # print "///////////////////////////////////////" # print temp[-1] # print "///////////////////////////////////////" tra.append((temp[-1])) temp = [] numero += 1 leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2: leggiTra1 = False dizio = {} add = 0 if lista[5] == "EULER": add += 1 dizio["euler"] = [float(lista[5 + add]), float(lista[6 + add]), float(lista[7 + add])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = nameRota # str(lista[3]) dizio["frac"] = [float(lista[9 + add]), float(lista[10 + add]), float(lista[11 + add])] dizio["bfactor"] = float(lista[13 + add]) dizio["elong"] = 0 temp.append(dizio) if len(temp) > 0: # print "ACTUAL VRMS2 is",vrms if vrms is not None: (temp[-1])["vrms"] = vrms if mode == "PACK": (temp[-1])["zscore"] = savezscore (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP": (temp[-1])["llg"] = diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] (temp[-1])["zscore"] = diziocorresp[numero][2] else: (temp[-1])["zscore"] = FOM[numero][1] (temp[-1])["llg"] = FOM[numero][0] if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw tra.append((temp[-1])) temp = [] numero += 1 # print "number of translation read", len(tra) # print "number of fixed",len(fixed) if mode == "PACK" and len(diziocorresp.keys()) > 0: pat = [] for sdf in range(len(fixed)): if diziocorresp[sdf + 1] <= cutoff: rt = fixed[sdf][-1] rt["fixed_frags"] = fixed[sdf][:-1] # print "sdf",sdf,type(rt),len(rt["fixed_frags"]) pat.append(copy.deepcopy(rt)) tra = pat for trrr in range(len(tra)): trasla = tra[trrr] if mode == "RNP": # print "trrr+1",trrr+1 # print "corrispon",diziocorresp[trrr+1][0] # print "len fixed",len(fixed) # print fixed[diziocorresp[trrr+1][0]-1] # print fixed[diziocorresp[trrr+1][0]-1][:-1] # print trasla["name"] trasla["fixed_frags"] = copy.deepcopy( fixed[diziocorresp[trrr + 1][0] - 1][:-1]) # ultimo di ogni sottolista di fixed e la soluzione stessa # trasla["original_rotcluster"] = fixed[diziocorresp[trrr+1][0]-1][0]["n_prev_cluster"] elif not ((mode == "PACK" and len(diziocorresp.keys()) > 0) or tcns): trasla["fixed_frags"] = copy.deepcopy(fixed) baseCombi = "" for fi in trasla["fixed_frags"]: if len(baseCombi) == 0: baseCombi += fi["original_rotcluster"] else: baseCombi += "_" + fi["original_rotcluster"] fi["original_rotcluster"] = baseCombi fi["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) return tra, fixed, diziona, convertnames def readRotationsFRF(baseDir, name, quate, tops=None): rlist = os.path.join(baseDir, name + ".rlist") out = os.path.join(baseDir, name + ".out") sh = os.path.join(baseDir, name + ".sh") fi = open(sh, "r") script = fi.readlines() fi.close() """ dati_fixed = [] for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": dati_fixed.append((int(lista[15]),float(lista[17]),float(lista[19]))) """ fixed = [] first = False baseCombi = "" for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: break if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 20: dizio = {} if len(baseCombi) == 0: baseCombi += str(lista[15]) else: baseCombi += "_" + str(lista[15]) dizio["original_rotcluster"] = baseCombi dizio["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) dizio["zscore"] = float(lista[19]) dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 fixed.append(dizio) fi = open(rlist, "r") rotazioni = fi.readlines() fi.close() fi = open(out, "r") rotFOM = fi.readlines() fi.close() FOM = {} start = False section = False for linea in rotFOM: if linea.startswith("Number LLG Z-Score"): section = True continue if section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if start and linea.startswith("$$"): start = False break if start: lista = linea.split() if len(lista) == 3: FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] else: start = False rota = [] numero = 1 for linea in rotazioni: if tops != None and numero > tops: break lista = linea.split() if lista[0] == "SOLU" and lista[1] == "TRIAL": dizio = {} dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["zscore"] = FOM[numero][1] dizio["llg"] = FOM[numero][0] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [0.0, 0.0, 0.0] dizio["bfactor"] = 0.0 dizio["elong"] = 0 rota.append(dizio) numero += 1 return rota, fixed def convertFromFracToOrth(t1, t2, t3, cell_dim, parameters): if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A tz = t3 / parameters["uu"] ty = (t2 - tz * parameters["vv"]) / parameters["uuy"] tx = (t1 - ty * parameters["vvz"] - tz * parameters["uuz"]) / parameters["vvy"] return tx, ty, tz, parameters def convertFromOrthToFrac(x, y, z, cell_dim, parameters): if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A nx = (x * parameters["vvy"]) + (y * parameters["vvz"]) + (z * parameters["uuz"]) ny = (y * parameters["uuy"]) + (z * parameters["vv"]) nz = z * parameters["uu"] return nx, ny, nz, parameters def rotateStructureByQuaternion(num, num2, structure, quate, q, outputPath, mode="rotateByOrthCoord", cell_dim=[]): # TODO: This methods does not work properly. Both rotateByOrthCoord and rotateByCrystCoord does not produce the correct rotation qConj = quate.QuaternionConjugate(q) pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z tmpQ = [0.0, 0.0, 0.0, 0.0] tmpQ[3] = 0 tmpQ[0] = x tmpQ[1] = y tmpQ[2] = z tmpQ2 = quate.QuaternionProduct(q, tmpQ) tmpQ = quate.PointQuaternionProd(tmpQ2, qConj) nx = tmpQ[0] ny = tmpQ[1] nz = tmpQ[2] if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def writeClustersPDBS(Clusters, dirout, mode, quate, convNames, printElonged, performTranslation, cell_dim, modeTra="frac"): if not os.path.exists(dirout): os.makedirs(dirout) for ci in range(len(Clusters)): clu = Clusters[ci] lion = (clu["heapSolutions"]).asList() dirClu = dirout + str(ci) + "/" if not os.path.exists(dirClu): os.makedirs(dirClu) structureRef = None for i in range(len(lion)): rota = (lion[i])[1] if not printElonged and rota["elong"] > 0: continue parser = PDBParser() structure = parser.get_structure(rota["name"], convNames[rota["name"]]) if mode == "matrix": rotateStructureByMatrix(ci, i, structure, rota["rotationMatrices"], dirClu, cell_dim) elif mode == "quaternion": rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) elif mode == "simmetry_rotated": """ quo = None if "simmetry_rotated" in rota: quo = rota["simmetry_rotated"] else: quo = rota["quaternion"] rotateStructureByQuaternion(ci,i,structure,quate,quo,dirClu) """ if "simmetry_rotated" in rota: # rotateStructureByQuaternion(ci,i,structure,quate,rota["quaternion"],dirClu) # parser=PDBParser() # structure=parser.get_structure(rota["name"],dirClu+str(ci)+"_"+str(i)+"_rot.pdb") rotateStructureByQuaternion(ci, i, structure, quate, rota["simmetry_rotated"], dirClu) else: rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) # print performTranslation,modeTra if modeTra == "frac" and performTranslation: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyFrac(ci, i, structure, rota["frac"], dirClu, cell_dim) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") elif performTranslation and modeTra == "Cmass": # print "Devo scrivere traslai",i if i == 0: parser = PDBParser() structureRef = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") else: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structure, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") def translateStructurebyFrac(num, num2, struct, frac, outputPath, cell_dim, writePDB=True): structure = copy.deepcopy(struct) if writePDB: pdb = open(outputPath + str(num) + "_" + str(num2) + "_rottra.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) atom.set_coord(numpy.array([nx, ny, nz])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() else: return structure def filterEqualRotations(Clusters, convNames, quate, laue, listNCS, cell_dim, where): Clus = [] index = 0 for clu in Clusters: lis = copy.deepcopy(clu["heapSolutions"].asList()) lit = [] for item in lis: prio, rota = item lit.append(rota) Pru = [] Pru = clusterizeRotations(lit, quate, laue, listNCS, Pru, 0.0, "distributionCV", convNames, cell_dim, 0.5, False) din = {} hp = ADT.Heap() for tru in Pru: prio, item = tru["heapSolutions"].pop() hp.push(prio, item) writeSumClusters(tru, where, "clustersIDE" + str(index), convNames) din["heapSolutions"] = hp Clus.append(din) index += 1 return Clus def translateStructurebyVector(num, num2, structure, vect, outputPath, centroidCA=None): pdb = open(outputPath + str(num) + "_" + str(num2) + "_rottra.pdb", "w") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) nx = 0.0 ny = 0.0 nz = 0.0 if centroidCA != None: # SUBTRACT TO EACH STRUCTURE THE CORRESPONDING CENTROID nx = (x - centroidCA[0]) + vect[0] ny = (y - centroidCA[1]) + vect[1] nz = (z - centroidCA[2]) + vect[2] else: nx = x + vect[0] ny = y + vect[1] nz = z + vect[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def translateStructurebyCentroidMass(num, num2, structureMove, structureRef, outputPath): # COMPUTE CENTROID OF MASS OF THE FRAGMENT TO MOVE AND REFERENCE listCAs1 = [] for model in structureMove: for chain in model: for residue in chain: # coord = residue["CA"].get_coord() for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs1.append([coord[0], coord[1], coord[2]]) # listCAs1.append(residue["CA"]) listCAs2 = [] for model in structureRef: for chain in model: for residue in chain: for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs2.append([coord[0], coord[1], coord[2]]) # listCAs2.append(residue["CA"]) if len(listCAs2) > len(listCAs1): how = -1 * (len(listCAs2) - len(listCAs1)) listCAs2 = listCAs2[:how] elif len(listCAs1) > len(listCAs2): how = -1 * (len(listCAs1) - len(listCAs2)) listCAs1 = listCAs1[:how] """ super_imposer = Superimposer() super_imposer.set_atoms(listCAs2, listCAs1) rot, tran = super_imposer.rotran rot=rot.astype('f') tran=tran.astype('f') """ moveCas = numpy.array(listCAs1) centroidMove = numpy.mean(moveCas, axis=0) refCas = numpy.array(listCAs2) centroidRef = numpy.mean(refCas, axis=0) # COMPUTE THE VECTOR BETWEEN THE TWO CENTROIDS A,B,C = XCREF-XCMOVE,YCREF-XCMOVE,ZCREF-YCMOVE a = centroidRef[0] - centroidMove[0] b = centroidRef[1] - centroidMove[1] c = centroidRef[2] - centroidMove[2] # a = centroidMove[0]-centroidRef[0] # b = centroidMove[1]-centroidRef[1] # c = centroidMove[2]-centroidRef[2] vect = [a, b, c] # print "Translation Vector",vect #tran # SEND TO TRANSLATE STRUCTUREBYVECTOR # print centroidRef, centroidMove, vect # translateStructurebyVector(num,num2,structureMove,tran,outputPath) translateStructurebyVector(num, num2, structureMove, vect, outputPath) def __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=None, getGlobalStats=False): global MAP_OF_ROT_COMB CluWork = [] if frag_fixed > 1: merged = __mergeEquivalentRotCombination() for key in merged: dictio = {"heapSolutions": ADT.Heap()} alli = [key] + merged[key] for clu in ClusAll: for item in clu["heapSolutions"].asList(): prio, rota = item if rota["n_prev_cluster"] not in alli: break dictio["heapSolutions"].push(prio, rota) CluWork.append(dictio) else: CluWork = ClusAll numc = 0 listRotaClus = [] stats = {} zmax = 0 llgmax = 0 distinctall = [] for clu in CluWork: if len(clu["heapSolutions"].asList()) == 0: continue numc = clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] # print "N.Cluster",numc if LIMIT_CLUSTER != None and LIMIT_CLUSTER != numc: continue for rel in MAP_OF_ROT_COMB.keys(): val = MAP_OF_ROT_COMB[rel] # print "!!!!-----!!!!---!!!!!",val,numc,val==numc,type(val),type(numc),rel if val == numc: numc = tuple(sorted(map(lambda x: int(x), json.loads(str(rel).replace("(", "[").replace(")", "]"))))) # print "////",rel,numc break distinct_pdbs = [] nrts = len(clu["heapSolutions"].asList()) llg_all = [] zscore_all = [] for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) if rota["llg"] > llgmax: llgmax = rota["llg"] if rota["zscore"] > zmax: zmax = rota["zscore"] pdbname = ensembles[rota["name"]] corresponding = (pdbname.split("/"))[-1] listona = corresponding.split("_") pdbid = listona[0] model = listona[1] idSolution = listona[-1] idSolution, ext = idSolution.split(".") if pdbid not in distinct_pdbs: distinct_pdbs.append(pdbid) if pdbid not in distinctall: distinctall.append(pdbid) llg_avg = float(numpy.mean(numpy.array(llg_all))) zscore_avg = float(numpy.mean(numpy.array(zscore_all))) llg_std = float(numpy.std(numpy.array(llg_all))) llg_min = float(numpy.min(numpy.array(llg_all))) llg_max = float(numpy.max(numpy.array(llg_all))) zscore_max = float(numpy.max(numpy.array(zscore_all))) distpdb = len(distinct_pdbs) listRotaClus.append((distpdb, llg_max, numc)) stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min] if getGlobalStats: return stats, listRotaClus, len(distinctall), llgmax, zmax else: return stats, listRotaClus def __getStatFromSumAndModel(sumPath, modelo, fullmodel, cluster, mode, n_ense=None): Clu, dicname = readClustersFromSUM(sumPath) # print "##",sumPath # print "Numero clusters",len(Clu) # for rf in Clu: # print "Numero rot.",len(rf["heapSolutions"].asList()) rop = None topLLG = None topZSCORE = None posRank = None pos = 1 frag = -1 rp = None if mode == "ARCIMBOLDO": newPath = os.path.join(os.path.split(os.path.dirname(sumPath))[0], "5_RNP_LIBRARY/clusters.sum") if newPath != sumPath: tupl = __getStatFromSumAndModel(newPath, fullmodel, fullmodel, cluster, mode) rp = tupl[0] if cluster == None: Clu = sorted(Clu, __cmp_cluster, reverse=True) for clust in Clu: if cluster != None: pos = 1 for item in clust["heapSolutions"]: prio, rota = item actualmodel = "" model = copy.deepcopy(modelo) if cluster != None and rota["n_prev_cluster"] != int(cluster): break actualmodel = os.path.basename(dicname[rota["name"]]) if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES", "RNP"]: if not ("FR" in actualmodel and "xx" in actualmodel): ad = actualmodel.split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[2].split("-")[0] elif mode in ["ARCIMBOLDO-SHREDDER"]: # ad = actualmodel.split("_") # actualmodel = ad[0]+"_0_0.pdb" # ad = model.split("_") # model = ad[0]+"_0_0.pdb" # NOTE: The name of the model is already correct, and the first _0_ might not be 0 if the run comes from spheres pass elif mode == "ARCIMBOLDO": # print os.path.basename(dicname[rota["name"]])[:-4] # print "Analyzing",rota["name"] frag = int(model.split("FR")[1].split("_")[0]) frar = int(rota["name"].split("FR")[1].split("_")[0]) actualmodel = "" # print "Found",frag,"in model and",frar,"rota_name" if frag == frar: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" # .split("_")[0] model = "xx" + model.split("xx")[1] # print "////....////",actualmodel,model # if actualmodel == model: # print actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) elif rp != None and "fixed_frags" in rp: for fri in rp["fixed_frags"]: frag = int(fri["name"].split("FR")[1].split("_")[0]) # print "--------------frag",frag,"------------------frar",frar,"friname",fri["name"] if frag == frar: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" if len(actualmodel.split("-")) > 1: model = fri["name"].split("ensembleID")[1] + ".pdb" elif len(fri["name"].split("ensembleID")[1].split("-")) > 1: model = fri["name"].split("ensembleID")[1].split("-")[0] + ".pdb" else: model = fri["name"].split("ensembleID")[1] + ".pdb" # print "----....----...",actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) break # model = model.split(".")[0] if model.endswith(".pdb") and not actualmodel.endswith(".pdb"): actualmodel += ".pdb" elif not model.endswith(".pdb") and actualmodel.endswith(".pdb"): actualmodel = actualmodel[:-4] #print "=====",n_ense,actualmodel,model,rota["name"].split("-")[1] if (n_ense == None and actualmodel == model) or ( actualmodel == model and int(rota["name"].split("-")[1]) == int(n_ense)): rop = rota posRank = pos if rota["llg"] > topLLG: topLLG = rota["llg"] if rota["zscore"] > topZSCORE: topZSCORE = rota["zscore"] pos += 1 return rop, topLLG, topZSCORE, posRank def __getStatFromSumCCAndModel(pathBCC, pathACC, model, mode): befref = [] aftref = [] if pathBCC != None: befref, con2 = readCCValFromSUM(pathBCC) if pathACC != None: aftref, con1 = readCCValFromSUM(pathACC) listcc_before = [] listcc_after = [] for mod in befref: listcc_before.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_before = sorted(listcc_before, reverse=True) posB = 1 ropB = None topINITCC_B = None if pathBCC != None: topINITCC_B = listcc_before[0][0] posRankB = None for item in listcc_before: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) if actualmodel == model: ropB = item posRankB = posB break posB += 1 for mod in aftref: listcc_after.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_after = sorted(listcc_after, reverse=True) posA = 1 ropA = None topINITCC_A = None if pathACC: topINITCC_A = listcc_after[0][0] posRankA = None for item in listcc_after: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) # print mode,actualmodel,model if actualmodel == model: # if os.path.basename(item[2]) == model: ropA = item posRankA = posA break posA += 1 return ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA def generatePDFGraph(current_dir, nameOutput, title, data): data = sorted(data) helil = map(lambda x: x[0], data) llgs = map(lambda x: x[1], data) rmsds = map(lambda x: x[2], data) ress = map(lambda x: x[3], data) nomepdf = os.path.join(current_dir, nameOutput + ".pdf") def make_colormap(seq): """Return a LinearSegmentedColormap seq: a sequence of floats and RGB-tuples. The floats should be increasing and in the interval (0,1). """ seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3] cdict = {'red': [], 'green': [], 'blue': []} for i, item in enumerate(seq): if isinstance(item, float): r1, g1, b1 = seq[i - 1] r2, g2, b2 = seq[i + 1] cdict['red'].append([item, r1, r2]) cdict['green'].append([item, g1, g2]) cdict['blue'].append([item, b1, b2]) return mcolors.LinearSegmentedColormap('CustomMap', cdict) c = mcolors.ColorConverter().to_rgb rvb = make_colormap( [c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.scatter(array_dg[:, 0], array_dg[:, 1], c=rmsd, cmap=rvb) plt.colorbar() plt.show() with PdfPages(nomepdf) as pdf: rvb = make_colormap([c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.figure(figsize=(3, 3)) plt.title('RMSD') plt.scatter(helil, llgs, c=rmsds, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() rvb = make_colormap([c('green'), c('blue'), 0.33, c('violet'), c('black'), 0.66, c('black')]) plt.rc('text', usetex=True) plt.figure(figsize=(3, 3)) plt.title('Resolution') plt.scatter(helil, llgs, c=ress, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() def __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res): full = "" base = "[\n" base += "['Helix', 'LLG', {'type': 'string', 'role': 'style'}, {'type':'string', 'role':'tooltip'}],\n" full += base for it in range(len(data)): item = data[it] # print "========",item[3],first_res, last_res,SystemUtility.htmlRgb(item[3], first_res, last_res,"blue") stroke_color = "" + str(SystemUtility.htmlRgb(item[3], first_res, last_res, "blue")) # hexadecimal stroke_opacity = "0.6" # float stroke_width = "1" # integer # print "********",item[2],first_rmsd, last_rmsd,SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd,"red") fill_color = "" + str(SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd, "red")) # hexadecimal fill_opacity = "1.0" full += "[" + str('%.2f' % item[0]) + ", " + str('%.2f' % (item[1] / item[ 0])) + ", 'point {stroke-color: " + stroke_color + "; stroke-opacity: " + stroke_opacity + "; stroke-width: " + stroke_width + "; fill-color: " + fill_color + "; fill-opacity: " + fill_opacity + "}','LLG: " + str( '%.2f' % item[1]) + ", RMSD: " + str('%.2f' % item[2]) + ", Res: " + str('%.2f' % item[3]) + "']" if it == len(data) - 1: full += "\n" else: full += ",\n" full += "]\n" return full def __extractTablesGraph(data): dic_rmsd_results = {} dic_res_results = {} full = "" data = sorted(data) first_rmsd = min(map(lambda x: x[2], data)) last_rmsd = max(map(lambda x: x[2], data)) first_res = min(map(lambda x: x[3], data)) last_res = max(map(lambda x: x[3], data)) dic_rmsd = {} dic_res = {} for it in range(len(data)): item = data[it] key_rmsd = '%.2f' % item[2] key_res = '%.2f' % item[3] if key_rmsd not in dic_rmsd.keys(): dic_rmsd[key_rmsd] = [item] else: dic_rmsd[key_rmsd].append(item) if key_res not in dic_res.keys(): dic_res[key_res] = [item] else: dic_res[key_res].append(item) full = __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res) for key_rmsd in dic_rmsd.keys(): value_rmsd = dic_rmsd[key_rmsd] tabella = __writeLineTable(value_rmsd, first_rmsd, last_rmsd, first_res, last_res) dic_rmsd_results[key_rmsd] = tabella for key_res in dic_res.keys(): value_res = dic_res[key_res] tabella = __writeLineTable(value_res, first_rmsd, last_rmsd, first_res, last_res) dic_res_results[key_res] = tabella return dic_rmsd_results, dic_res_results, full def generateHTMLGraph(current_dir, nameOutput, title, data): nomehtml = os.path.join(current_dir, nameOutput + ".html") base_header = """ """ + str(nameOutput) + """ """ dic_rmsd_results, dic_res_results, full = __extractTablesGraph(data) list_rmsd = sorted(dic_rmsd_results.keys()) list_res = sorted(dic_res_results.keys()) first_hel = min(map(lambda x: x[0], data)) last_hel = max(map(lambda x: x[0], data)) first_dllg = min(map(lambda x: x[1] / x[0], data)) last_dllg = max(map(lambda x: x[1] / x[0], data)) base_header += __AddGraphHTML("full", full, title, "", "", first_hel, last_hel, first_dllg, last_dllg) values_used = [] for w in range(len(list_rmsd)): key_rmsd = list_rmsd[w] tabella = dic_rmsd_results[key_rmsd] typev = "RMSD" base_header += __AddGraphHTML(w, tabella, title, key_rmsd, typev, first_hel, last_hel, first_dllg, last_dllg) values_used.append(w) for w in range(len(list_res)): key_res = list_res[w] tabella = dic_res_results[key_res] typev = "Resolution" base_header += __AddGraphHTML(w + len(list_rmsd), tabella, title, key_res, typev, first_hel, last_hel, first_dllg, last_dllg) values_used.append(w + len(list_rmsd)) base_header += """ \n \n \n """ f = open(nomehtml, "w") f.write(base_header) f.close() def __AddGraphHTML(w, tabella, title, key, typev, first_hel, last_hel, first_dllg, last_dllg): testo = """
""" return testo def forceGenerateHTML(lock, current_directory, nameOutput): global POSTMORTEM # print "Trying to adquire lock to the HTML....." lock.acquire() # print "Lock adquired!!!!! HTML" nomexml = os.path.join(current_directory, nameOutput + ".xml") nomehtml = os.path.join(current_directory, nameOutput + ".html") if not os.path.exists(nomexml): # print "release the lock HTML" lock.release() return tree = ET.parse(nomexml) root = tree.getroot() base_header = """ """ + str(nameOutput) + """
\n""" if root.tag == "arcimboldo": base_header += """

ARCIMBOLDO

\n""" elif root.tag == "borges-arcimboldo": base_header += """

ARCIMBOLDO-BORGES

\n""" base_header += """ """ base_header += """
\n

Borges started at: """ + root.find('configuration/time_start').text + """.

=============================INPUT FILE """ + root.find('configuration/bor_name').text + """=============================

""" + root.find('configuration/bor_text').text + """

============================================Summary of your data=====================================================

SPACEGROUP: """ + root.find('data/spacegroup').text + """
CELL DIMENSIONS: """ + root.find('data/cell_dim/A').text + ", " + root.find('data/cell_dim/B').text + ", " + root.find( 'data/cell_dim/C').text + ", " + root.find('data/cell_dim/alpha').text + ", " + root.find( 'data/cell_dim/beta').text + ", " + root.find('data/cell_dim/gamma').text + """
RESOLUTION: """ + root.find('data/resolution').text + """
NUMBER OF UNIQUE REFLECTIONS: """ + root.find('data/unique_refl').text + """

""" if float(root.find('data/resolution').text) <= 2.5 and float(root.find('data/resolution').text) >= 2.0: base_header += """

WARNING: At """ + root.find('data/resolution').text + """ it will be difficult to solve the structure and foms might be misleading

""" elif float(root.find('data/resolution').text) > 2.5: base_header += """

WARNING: At """ + root.find('data/resolution').text + """ ARCIMBOLDO is not supported. Please contact us for specific help

""" if float(root.find('data/completeness').text) < 98: base_header += """

WARNING: Having """ + root.find('data/completeness').text + """ of Completeness of the data for the given resolution is not enough for the success of the method thus ARCIMBOLDO will be terminated!

""" if root.tag == "arcimboldo": base_header += """\n

2. STEP: Locating Sequentially the Fragments

\n""" fran = 1 aden_show = "" aden_hide = "" while 1: ensfra = root.find('ens1_frag' + str(fran)) if ensfra is None: break base_header += """ """ base_header += """\n""" if root.find("ens1_frag" + str(fran) + "/FRF") is not None: clustn = json.loads(root.find("ens1_frag" + str(fran) + "/FRF/allclus").text) for key in clustn: start_table_row = "" body_table_row = "" keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") body_table_row += """\n \n""" if root.find("ens1_frag" + str(fran) + "/FTF") is not None: clustftf = json.loads(root.find("ens1_frag" + str(fran) + "/FTF/allclus").text) if key in clustftf: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/PACK") is not None: clustpack = json.loads(root.find("ens1_frag" + str(fran) + "/PACK/allclus").text) if key in clustpack: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/RNP") is not None: clustrnp = json.loads(root.find("ens1_frag" + str(fran) + "/RNP/allclus").text) if key in clustrnp: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/INITCC") is not None: clustinitcc = json.loads( root.find("ens1_frag" + str(fran) + "/INITCC/allclus").text) # print "Fragment",fran,"CLUSTINITCC",clustinitcc,"KEY",key if key in clustinitcc: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/EXP") is not None: if root.find("ens1_frag" + str( fran) + "/EXP/Cluster").text == str(key): body_table_row += """\n""" body_table_row += """\n""" body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str( key): colorbg.append("0xA069D6") if float(root.find("ens1_frag" + str( fran) + "/EXP/finalcc").text) >= 30.0: start_table_row += """\n""" elif len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" else: for tyo in range(3): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str( key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(3): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(4): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(4): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(7): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str( key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(7): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str( key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(12): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(12): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(17): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(17): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ base_header += start_table_row base_header += body_table_row base_header += "\n" fran += 1 base_header += "\n
Fragment """ + str(fran) + """
Cluster Rotation Function Translation Function Packing Rigid Body Refinement Initial CC Best Trace CC/aa
#Rots. Top LLG Mean LLG Top Zscore Mean Zscore #Trans. Top LLG Mean LLG Top Zscore Mean Zscore #Sol. Top LLG Mean LLG Top Zscore Mean Zscore #Sol. Top LLG Mean LLG After Refinement CC Cycle CC #Res. traced
""" + str(key).replace("[", "(").replace("]", ")") + """""" + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Rotations").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_zscore").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_zscore").text) + """""" + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Translations").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_zscore").text) + """ """ + str(root.find( "ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_zscore").text) + """""" + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Translations").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_llg").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_llg").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_zscore").text) + """ """ + str(root.find("ens1_frag" + str( fran) + "/PACK/C" + keyd + "/Mean_zscore").text) + """""" + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Solutions").text) + """ """ + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Top_llg").text) + """ """ + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Mean_llg").text) + """""" + str(root.find("ens1_frag" + str( fran) + "/INITCC/C" + keyd + "/initcc").text) + """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/cycle").text) + """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/finalcc").text) + """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/restraced").text) + """
\n" base_header += """ \n \n """ if root.find('backtracing') is not None: base_header += "

The current best solution is: " + str( root.find('backtracing/model').text) + " with FINALCC: " + str( root.find('backtracing/finalcc').text) + " and n. residues traced " + str( root.find('backtracing/restraced').text) + "
file is: " + str( root.find('backtracing/file').text) + "

\n" # FRF base_header += """

\n""" if float(root.find('backtracing/EXP/finalcc').text) >= 30: if float(root.find('data/resolution').text) < 2.1: base_header += """

It seems you have a good solution!
Here you can find the best solution and map for further refinement.

\n""" else: base_header += """

Here you can find the best solution and map for further refinement.

\n""" SOLVED = True elif root.tag == "borges-arcimboldo": aden_show = "" aden_hide = "" if root.find('rot_clustering') is not None: full = json.loads(root.find('rot_clustering/full').text) reduced = json.loads(root.find('rot_clustering/reduced').text) aden_show = "" aden_hide = "" for ldr in full: nclu, way = ldr aden_show += """ document.getElementById('FRF-""" + str( nclu) + """').style.display=""" + way + """;\n """ for ldr in reduced: nclu, way = ldr aden_hide += """ document.getElementById('FRF-""" + str( nclu) + """').style.display=""" + way + """;\n """ aden_show += """ document.getElementById('full').style.display = "";\n """ aden_show += """ document.getElementById('reduced').style.display = "none";\n """ aden_hide += """ document.getElementById('full').style.display = "none";\n """ aden_hide += """ document.getElementById('reduced').style.display = "";\n """ base_header += """
""" brins = 0 green = json.loads(root.find('rot_clustering/green').text) while 1: berd = root.find('rot_clustering/FRF-' + str(brins)) if berd == None: break base_header += """ """ if brins in green: base_header += """\n""" else: base_header += """\n""" base_header += """ """ brins += 1 base_header += """
#Cluster #Rotations #Distinct Pdb Top LLG LLG Mean Top Zscore Zscore Mean
\n \n
""" base_header += """

2. STEP: Evaluating Clusters independently

""" fran = 1 aden_show = "" aden_hide = "" base_header += """\n""" if root.find("ens1_frag" + str(fran) + "/FRF") is not None: clustn = json.loads(root.find("ens1_frag" + str(fran) + "/FRF/allclus").text) for key in clustn: start_table_row = "" body_table_row = "" keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd) is None: continue body_table_row += """\n \n""" if root.find("ens1_frag" + str(fran) + "/FTF") is not None: clustftf = json.loads(root.find("ens1_frag" + str(fran) + "/FTF/allclus").text) if key in clustftf and root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd) is not None: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/PACK") is not None: clustpack = json.loads(root.find("ens1_frag" + str(fran) + "/PACK/allclus").text) if key in clustpack and root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd) is not None: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/RNP") is not None: clustrnp = json.loads(root.find("ens1_frag" + str(fran) + "/RNP/allclus").text) if key in clustrnp and root.find("ens1_frag" + str( fran) + "/RNP/C" + keyd) is not None: body_table_row += """ \n""" if root.find("ens1_frag" + str(fran) + "/INITCC") is not None: clustinitcc = json.loads( root.find("ens1_frag" + str(fran) + "/INITCC/allclus").text) if key in clustinitcc and root.find("ens1_frag" + str( fran) + "/INITCC/BEFORE/C" + keyd) is not None: body_table_row += """ \n""" if root.find("ens1_frag" + str( fran) + "/EXP") is not None and root.find('ens1_frag' + str( fran) + '/EXP/C' + str(keyd)) is not None: # print "EXP",root.find("ens1_frag"+str(fran)+"/EXP/C"+str(keyd)+"/Cluster").text,str(key),root.find("ens1_frag"+str(fran)+"/EXP/C"+str(keyd)+"/Cluster").text == str(key) if root.find("ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/Cluster").text == str(key): body_table_row += """\n""" body_table_row += """\n""" body_table_row += """\n""" body_table_row += """\n""" else: for tyo in range(4): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if float(root.find("ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/finalcc").text) >= 30.0: start_table_row += """\n""" elif len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(4): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(6): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(6): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str( key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(9): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str( key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(9): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(14): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """\n""" else: start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(14): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ else: for tyo in range(19): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "none";\n """ else: for tyo in range(19): body_table_row += """\n""" start_table_row += """\n""" aden_show += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ aden_hide += """ document.getElementById('ens1_frag""" + str( fran) + """_""" + keyd + """').style.display = "";\n """ base_header += start_table_row base_header += body_table_row base_header += "\n" base_header += "\n
#Cluster Rotation and Model Refinement Translation Function Packing Rigid Body Refinement Initial CC Best Trace CC/aa
#Rots. Top LLG Mean LLG Top Zscore Mean Zscore #Trans. Top LLG Mean LLG Top Zscore Mean Zscore #Sol. Top LLG Mean LLG Top Zscore Mean Zscore #Sol. Top LLG Mean LLG Before Refinement CC After Refinement CC MODE Cycle CC #Res. traced
""" + str(key).replace("[", "(").replace("]", ")") + """""" + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Rotations").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_zscore").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_zscore").text) + """""" + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Translations").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_llg").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_zscore").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_zscore").text) + """""" + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Translations").text) + """ """ + str(root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_llg").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_llg").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_zscore").text) + """ """ + str(root.find( "ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_zscore").text) + """""" + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Solutions").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Top_llg").text) + """ """ + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Mean_llg").text) + """""" + str(root.find("ens1_frag" + str( fran) + "/INITCC/BEFORE/C" + keyd + "/initcc").text) + """ """ + str(root.find("ens1_frag" + str( fran) + "/INITCC/AFTER/C" + keyd + "/initcc").text) + """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/mode").text) + """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/cycle").text) + """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/finalcc").text) + """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/restraced").text) + """
\n" base_header += """ \n \n """ if root.find('backtracing') is not None: base_header += "

The current best solution is: " + str( root.find('backtracing/model').text) + " with FINALCC: " + str( root.find('backtracing/finalcc').text) + " and n. residues traced " + str( root.find('backtracing/restraced').text) + "
file is: " + str( root.find('backtracing/file').text) + "

\n" # FRF base_header += """

\n""" if float(root.find('backtracing/EXP/finalcc').text) >= 30: base_header += """

It seems you have a good solution!
Here you can find the best solution and map for further refinement.
BORGES will end now.

\n""" SOLVED = True if root.find('LINKS') is not None: numblinks = int(root.find('LINKS/number').text) # print "===============NUBER OF LINKS=============",numblinks for qds in range(numblinks): # print root.find('LINKS/N'+str(qds)) if root.find('LINKS/N' + str(qds)) is not None: text_link = root.find('LINKS/N' + str(qds) + '/text').text url_link = root.find('LINKS/N' + str(qds) + '/url').text # print "url_link",url_link base_header += "

" + text_link + "

\n" # base_header += """

\n""" base_header += """

\n""" listatime = root.find('TIME') if listatime is not None: base_header += """

""" + "\n" listatime = json.loads(listatime.text) for tim in listatime: mode, step, ti = tim base_header += """Time MODE: """ + str(mode) + """ STEP: """ + str(step) + """ """ + str(ti) + """
\n""" base_header += """

\n""" base_header += """
""" f = open(nomehtml, "w") f.write(base_header) f.close() lock.release() print "release the lock HTML!!!" def generateHTML(current_directory, nameOutput): nomefile = os.path.join(current_directory, nameOutput + ".html") while 1: if os.path.exists(nomefile): os.remove(nomefile) print "Generation HTML....." forceGenerateHTML(lock, current_directory, nameOutput) time.sleep(5) def writeOutputFile(lock, DicParameters, ClusAll, outputDir, filename, mode, step, ensembles, frag_fixed, LIMIT_CLUSTER=None, path1=None, path2=None, useRefP1=False, useRGR=False, numberCyclesRef=1, usePacking=True, useTransla=True, makeEmpty=False, readSum=None, filterClusters=True, fromphis=False, fromdirexp="11.EXP"): # print "Trying getting lock, in Write XML",mode,step lock.acquire() # print "Log adquired!!!!!!!!!!!!!!!!!!!!!!!!!",mode,step SOLVED = False if readSum != None and os.path.exists(readSum): ensembles, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, readSum, "ROTSOL") nomexml = os.path.join(outputDir, filename + ".xml") if not os.path.exists(nomexml): lock.release() return tree = ET.parse(nomexml) root = tree.getroot() if mode == "ARCIMBOLDO-SHREDDER" and step == "CREATE_LINK": if root.find('LINKS/number') is None: ET.SubElement(root, 'LINKS') ET.SubElement(root.find('LINKS'), 'number').text = str(len(ClusAll)) for qds in range(len(ClusAll)): link = ClusAll[qds] ET.SubElement(root.find('LINKS'), "N" + str(qds)) ET.SubElement(root.find('LINKS/N' + str(qds)), 'text').text = link[0] ET.SubElement(root.find('LINKS/N' + str(qds)), 'url').text = link[1] else: acnumb = int(root.find('LINKS/number').text) root.find('LINKS/number').text = str(acnumb + len(ClusAll)) for qds in range(len(ClusAll)): link = ClusAll[qds] ET.SubElement(root.find('LINKS'), "N" + str(qds + acnumb)) ET.SubElement(root.find('LINKS/N' + str(qds + acnumb)), 'text').text = link[0] ET.SubElement(root.find('LINKS/N' + str(qds + acnumb)), 'url').text = link[1] if mode == "ARCIMBOLDO-SKIP" and step == "JUMP": # flin = os.path.join(outputDir,"./ens1_frag"+str(frag_fixed+1)+"/1_FRF_LIBRARY/clusters.sum") # ensembles = readClustersFromSUMToDB(DicParameters, flin, "ROTSOL") # writeOutputFile(lock,DicParameters,outputDir,filename,"ARCIMBOLDO","TABLE",ensembles,frag_fixed) # writeOutputFile(lock,DicParameters,outputDir,filename,"ARCIMBOLDO","JUMPFRAG_"+str(frag_fixed)+"_"+str(frag_fixed+1),ensembles,frag_fixed) ET.SubElement(root, 'ens1_frag' + str(frag_fixed)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'allclus').text = "[]" if mode == "ARCIMBOLDO" and step.startswith("JUMPFRAG_"): st, from_fr, to_fr = step.split("_") (root.find('ens1_frag' + from_fr)).tag = 'ens1_frag' + to_fr clustn = json.loads(root.find("ens1_frag" + str(to_fr) + "/FRF/allclus").text) allc = [] for key in clustn: start_table_row = "" body_table_row = "" keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") valonome = "" valodue = "" # print "----",keyd,step if int(from_fr) > 1: base = (keyd.split("-")[1]).split("_") dr = {} for br in base: if br in dr.keys(): dr[br] += 1 else: dr[br] = 1 minl = 100000 d = -1 for q in dr.keys(): if dr[q] <= minl: minl = dr[q] d = q base.append(d) base = sorted(base) valonome = ", ".join(base) valodue = "_".join(base) valonome = "(" + valonome + ")" valodue = "C-" + valodue + "-" else: valonome = "(" valodue = "C-" for ds in range(2): valonome += keyd + ", " valodue += keyd + "_" valonome = valonome[:-2] + ")" valodue = valodue[:-1] + "-" # print "aggiungo",valonome allc.append(valonome) adx = root.find("ens1_frag" + str(to_fr) + "/FRF/C" + keyd) # print adx adx.text = valonome # adx.find("Cluster").text = valonome root.find("ens1_frag" + str(to_fr) + "/FRF/C" + keyd).tag = valodue # print "all list",allc root.find("ens1_frag" + str(to_fr) + "/FRF/allclus").text = json.dumps(allc) ET.SubElement(root, 'ens1_frag' + str(from_fr)) ET.SubElement(root.find('ens1_frag' + str(from_fr)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(from_fr) + '/FRF'), 'allclus').text = "[]" if mode == "ARCIMBOLDO" or mode == "ARCIMBOLDO-BORGES" or mode == "ARCIMBOLDO-SHREDDER": if mode == "ARCIMBOLDO" and step == "INITCC": aftref, con1 = readCCValFromSUM(path1) stats = {} for mod in aftref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'INITCC') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'allclus').text = json.dumps( map(lambda x: "(" + str(x).replace("_", ", ") + ")" if str(x).replace("_", ", ") != x else str(x), sorted(stats.keys()))) bestinitcc = 0 bestclusterinitcc = "" for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] k = str(key).replace("_", ", ") if k != key: key = "(" + k + ")" keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestclusterinitcc').text = bestclusterinitcc elif mode.startswith("ARCIMBOLDO-BORGES") and step == "INITCC": befref, con2 = readCCValFromSUM(path1) aftref, con1 = readCCValFromSUM(path2) listcc_before = [] listcc_after = [] cluster = int(path1.split("/")[-2]) for mod in befref: listcc_before.append((mod["initcc"], mod["ner"])) listcc_before = sorted(listcc_before, reverse=True) for mod in aftref: listcc_after.append((mod["initcc"], mod["ner"])) listcc_after = sorted(listcc_after, reverse=True) stats = {} for mod in aftref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) if root.find('ens1_frag' + str(frag_fixed) + '/INITCC') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'INITCC') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'BEFORE') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'AFTER') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus')).text = json.dumps(list(set( json.loads((root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus')).text) + map( lambda x: str(x), sorted(stats.keys()))))) bestinitcc = 0 bestclusterinitcc = "" for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) stats = {} for mod in befref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestclusterinitcc') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestclusterinitcc').text = bestclusterinitcc ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestinitcc').text = str( '%.2f' % bestinitcc) elif float(bestinitcc) > float((root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestinitcc')).text): (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestclusterinitcc')).text = bestclusterinitcc (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestinitcc')).text = str('%.2f' % bestinitcc) performBacktracing = False backPDB = None FINALCC = None RESIDTR = None cluster = None if mode == 'ARCIMBOLDO' and step.startswith('FAST'): modality = step.split('_') typemode = modality[0] cycle = modality[1] # fragment = modality[2] p = subprocess.Popen('grep -H CC ' + path1 + '*.pdb', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) backPDB = finalcc[0][2] frag_fixed = int(os.path.basename(backPDB).split("FR")[1].split("_")[0]) + 1 fragment = frag_fixed if root.find('ens1_frag' + str(frag_fixed) + '/EXP') is not None: ficc = root.find('ens1_frag' + str(frag_fixed) + '/EXP/finalcc').text if float(finalcc[0][0]) > float(ficc): backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] modello = os.path.basename(backPDB) modelloFRF = modello.split('-')[0] + '.pdb' pathFRF = os.path.join(outputDir + '/ens1_frag' + str(frag_fixed), '1_FRF_LIBRARY/clusters.sum') rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) cluster = rop['original_rotcluster'] druppo = str(cluster) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) cluster = druppo if float(FINALCC) >= float(root.find('backtracing/EXP/finalcc').text): performBacktracing = True k = str(cluster).replace('_', ', ') if k != cluster: cluster = '(' + k + ')' root.find('ens1_frag' + str(frag_fixed) + '/EXP/Cluster').text = str(cluster) root.find('ens1_frag' + str(frag_fixed) + '/EXP/finalcc').text = str(FINALCC) root.find('ens1_frag' + str(frag_fixed) + '/EXP/restraced').text = str(RESIDTR) root.find('ens1_frag' + str(frag_fixed) + '/EXP/backpdb').text = str(backPDB) root.find('ens1_frag' + str(frag_fixed) + '/EXP/cycle').text = str(cycle) else: backPDB = None FINALCC = None RESIDTR = None cluster = None performBacktracing = False else: backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] modello = os.path.basename(backPDB) # print "----",backPDB modelloFRF = modello.split('-')[0] + '.pdb' pathFRF = os.path.join(outputDir + '/ens1_frag' + str(frag_fixed), '1_FRF_LIBRARY/clusters.sum') rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) cluster = rop['original_rotcluster'] druppo = str(cluster) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) cluster = druppo k = str(cluster).replace('_', ', ') if k != cluster: cluster = '(' + k + ')' ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'EXP') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'Cluster').text = str(cluster) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'finalcc').text = str(finalcc[0][0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'restraced').text = str(finalcc[0][1]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'backpdb').text = str(finalcc[0][2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'cycle').text = str(cycle) if root.find('backtracing/EXP/finalcc') is None or float(FINALCC) >= float( root.find('backtracing/EXP/finalcc').text): performBacktracing = True if (mode.startswith('ARCIMBOLDO-BORGES') or mode.startswith('ARCIMBOLDO-SHREDDER')) and step.startswith('FAST'): modality = step.split('_') typemode = modality[0] cycle = modality[1] cluster = modality[2] p = subprocess.Popen('grep -H CC ' + path1 + '*.pdb', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)) is not None: ficc = root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/finalcc').text if float(finalcc[0][0]) > float(ficc): backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] performBacktracing = True root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/Cluster').text = str(cluster) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/finalcc').text = str(FINALCC) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/restraced').text = str( RESIDTR) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/backpdb').text = str(backPDB) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/cycle').text = str(cycle) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/mode').text = str(typemode) else: backPDB = None FINALCC = None RESIDTR = None performBacktracing = False else: backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] if root.find('ens1_frag' + str(frag_fixed) + '/EXP') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'EXP') if root.find('ens1_frag' + str(frag_fixed) + '/EXP/' + 'C' + str(cluster)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'C' + str(cluster)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'Cluster').text = str( cluster) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'finalcc').text = str( finalcc[0][0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'restraced').text = str(finalcc[0][1]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'backpdb').text = str( finalcc[0][2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'cycle').text = str( cycle) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'mode').text = str( typemode) performBacktracing = True if step == "FTF": stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed) + '/FTF') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FTF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/FTF/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/FTF/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Translations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if step == "PACK": stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed) + '/PACK') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'PACK') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/PACK/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/PACK/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) bestllg = 0 bestzscore = 0 bestclusterllg = "" bestclusterzscore = "" for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Translations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if value[2] > bestllg: bestllg = value[2] bestclusterllg = str(key) if value[4] > bestzscore: bestzscore = value[4] bestclusterzscore = str(key) if root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterllg') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestclusterllg').text = bestclusterllg ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestclusterzscore').text = bestclusterzscore ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestllg').text = str( '%.2f' % bestllg) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestzscore').text = str( '%.2f' % bestzscore) else: if bestllg > float(root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestllg').text): root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterllg').text = bestclusterllg root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestllg').text = str('%.2f' % bestllg) if bestzscore > float(root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestzscore').text): root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterzscore').text = bestclusterzscore root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestzscore').text = str('%.2f' % bestzscore) if step == 'RNP': stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed) + '/RNP') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'RNP') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/RNP/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/RNP/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Solutions').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if step == 'TABLE': numeroClusters = len(ClusAll) stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed)) is None: ET.SubElement(root, 'ens1_frag' + str(frag_fixed)) if root.find('ens1_frag' + str(frag_fixed) + '/FRF') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: altt = stats.keys() finalt = [] for ask in altt: if isinstance(ask, tuple) and len(ask) == frag_fixed: finalt.append(ask) elif not isinstance(ask, tuple) and frag_fixed == 1: finalt.append(ask) root.find('ens1_frag' + str(frag_fixed) + '/FRF/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/FRF/allclus').text) + map(lambda x: str(x), sorted(finalt))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, \ 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Rotations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if mode == 'ARCIMBOLDO' and performBacktracing: modality = step.split('_') typemode = modality[0] cycle = modality[1] # fragment = modality[2] backPDA = backPDB[:-4] + '.pda' listus = os.path.basename(backPDB).split('_') pdbid = listus[0] model = listus[1] IdSolution = listus[-1] IdSolution = IdSolution[:-4] modello = os.path.basename(backPDB) modelloFRF = modello.split("-")[0] + ".pdb" if root.find('backtracing') is None: ET.SubElement(root, 'backtracing') ET.SubElement(root.find('backtracing'), 'model').text = str(modello) ET.SubElement(root.find('backtracing'), 'finalcc').text = str('%.2f' % FINALCC) ET.SubElement(root.find('backtracing'), 'restraced').text = str(RESIDTR) ET.SubElement(root.find('backtracing'), 'file').text = str(backPDB) else: root.find('backtracing/model').text = str(modello) root.find('backtracing/finalcc').text = str('%.2f' % FINALCC) root.find('backtracing/restraced').text = str(RESIDTR) root.find('backtracing/file').text = str(backPDB) # FRF pathFRF = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "1_FRF_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) if root.find('backtracing/FRF') is None: ET.SubElement(root.find('backtracing'), 'FRF') ET.SubElement(root.find('backtracing/FRF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FRF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FRF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FRF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FRF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FRF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FRF/posRank').text = str(posRank) root.find('backtracing/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FRF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FRF/cluster').text = str(cluster) root.find('backtracing/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FRF/top_zscore').text = str('%.2f' % topZSCORE) if useTransla: # FTF pathFTF = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "3_FTF_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFTF, modello, modello, None, mode) if root.find('backtracing/FTF') is None: ET.SubElement(root.find('backtracing'), 'FTF') ET.SubElement(root.find('backtracing/FTF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FTF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FTF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FTF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FTF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FTF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FTF/posRank').text = str(posRank) root.find('backtracing/FTF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FTF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FTF/cluster').text = str(cluster) root.find('backtracing/FTF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FTF/top_zscore').text = str('%.2f' % topZSCORE) # PACK if usePacking: pathPACK = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "4_PACK_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathPACK, modello, modello, None, mode) if root.find('backtracing/PACK') is None: ET.SubElement(root.find('backtracing'), 'PACK') ET.SubElement(root.find('backtracing/PACK'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/PACK'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/PACK'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/PACK'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/PACK'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/PACK'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/PACK/posRank').text = str(posRank) root.find('backtracing/PACK/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/PACK/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/PACK/cluster').text = str(cluster) root.find('backtracing/PACK/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/PACK/top_zscore').text = str('%.2f' % topZSCORE) # RNP pathRNP = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "5_RNP_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRNP, modello, modello, None, "RNP") if root.find('backtracing/RNP') is None: ET.SubElement(root.find('backtracing'), 'RNP') ET.SubElement(root.find('backtracing/RNP'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/RNP'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/RNP'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/RNP'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/RNP'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/RNP'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/RNP/posRank').text = str(posRank) root.find('backtracing/RNP/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/RNP/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/RNP/cluster').text = str(cluster) root.find('backtracing/RNP/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/RNP/top_zscore').text = str('%.2f' % topZSCORE) # INITCC pathACC = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "6_EXPVAL_LIBRARY/solCC.sum") ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA = __getStatFromSumCCAndModel(None, pathACC, modello, mode) if root.find('backtracing/INITCC') is None: ET.SubElement(root.find('backtracing'), 'INITCC') # AFTER REF ET.SubElement(root.find('backtracing/INITCC'), 'posRank').text = str(posRankA) ET.SubElement(root.find('backtracing/INITCC'), 'initcc').text = str('%.2f' % ropA[0]) ET.SubElement(root.find('backtracing/INITCC'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC'), 'top_initcc').text = str('%.2f' % topINITCC_A) else: root.find('backtracing/INITCC/posRank').text = str(posRankA) root.find('backtracing/INITCC/initcc').text = str('%.2f' % ropA[0]) root.find('backtracing/INITCC/cluster').text = str(cluster) root.find('backtracing/INITCC/top_initcc').text = str('%.2f' % topINITCC_A) if typemode == "FAST": ciclus = cycle # FAST # p = subprocess.Popen("grep -H CC "+os.path.join(outputDir+"/ens1_frag"+str(frag_fixed),"8_EXP_LIBRARY/"+str(ciclus)+"/*/"+os.path.basename(backPDB)), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen("grep -H CC " + path1 + os.path.basename(backPDB), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('backtracing/EXP') is None: ET.SubElement(root.find('backtracing'), 'EXP') ET.SubElement(root.find('backtracing/EXP'), 'cycle').text = str(ciclus) ET.SubElement(root.find('backtracing/EXP'), 'finalcc').text = str('%.2f' % finalcc[0][0]) ET.SubElement(root.find('backtracing/EXP'), 'restraced').text = str('%.2f' % finalcc[0][1]) else: root.find('backtracing/EXP/cycle').text = str(ciclus) root.find('backtracing/EXP/finalcc').text = str('%.2f' % finalcc[0][0]) root.find('backtracing/EXP/restraced').text = str('%.2f' % finalcc[0][1]) shutil.copyfile(backPDB, os.path.join(outputDir, "best.pdb")) shutil.copyfile(backPDB[:-4] + ".pda", os.path.join(outputDir, "best.pda")) shutil.copyfile(backPDB[:-4] + ".phs", os.path.join(outputDir, "best.phs")) shutil.copyfile(backPDB[:-4] + ".lst", os.path.join(outputDir, "best.lst")) if float(FINALCC) >= 30: SOLVED = True elif ( mode.startswith("ARCIMBOLDO-BORGES") or mode.startswith("ARCIMBOLDO-SHREDDER")) and performBacktracing and ( root.find('backtracing/finalcc') is None or ( float(FINALCC) >= float(root.find('backtracing/finalcc').text))): modality = step.split("_") typemode = modality[0] cycle = modality[1] cluster = modality[2] # backPDB = # FINALCC = # RESIDTR = # print "Momento prima dell'errore",backPDB,FINALCC,RESIDTR ad = os.path.basename(backPDB).split("_") pdbid = ad[0] model = ad[1] # ref = ad[-1] IdSolution = ad[2].split("-")[0] # ref = ref[:-4] ensem = os.path.basename(backPDB).split("-") if len(ensem) > 1: ensem = ensem[1].split("_")[0] else: ensem = None modello = pdbid + "_" + model + "_" + IdSolution + ".pdb" modelloTr = pdbid + "_" + model + "_" + ad[2] + ".pdb" if root.find('backtracing') is None: ET.SubElement(root, 'backtracing') ET.SubElement(root.find('backtracing'), 'model').text = str(modello) ET.SubElement(root.find('backtracing'), 'finalcc').text = str('%.2f' % FINALCC) ET.SubElement(root.find('backtracing'), 'restraced').text = str(RESIDTR) ET.SubElement(root.find('backtracing'), 'file').text = str(backPDB) else: root.find('backtracing/model').text = str(modello) root.find('backtracing/finalcc').text = str('%.2f' % FINALCC) root.find('backtracing/restraced').text = str(RESIDTR) root.find('backtracing/file').text = str(backPDB) # FRF pathFRF = os.path.join(outputDir, "1_FRF_Library/clustersNoRed.sum") # __getStatFromSumAndModel(sumPath,modelo,fullmodel,cluster,mode,n_ense=None) modfrf = modello #print modfrf if modello.split("_")[0].endswith("nogyre"): modfrf = modello.split("_")[0].replace("nogyre","")+"_"+modello.split("_")[1]+"_"+modello.split("_")[2] rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modfrf, modfrf, None, mode) #print "============================" #print pathFRF, modfrf, modfrf #print rop, topLLG,topZSCORE,posRank,rop["llg"] #print "============================" if root.find('backtracing/FRF') is None: ET.SubElement(root.find('backtracing'), 'FRF') ET.SubElement(root.find('backtracing/FRF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FRF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FRF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FRF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FRF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FRF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FRF/posRank').text = str(posRank) root.find('backtracing/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FRF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FRF/cluster').text = str(cluster) root.find('backtracing/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FRF/top_zscore').text = str('%.2f' % topZSCORE) for ciclus in range(numberCyclesRef): if ciclus > 0: # FRF if useRGR and not modello.split("_")[0].endswith("nogyre"): pathRGR = os.path.join(outputDir, "3_RGR/" + str(cluster) + "/" + str(ciclus - 1) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRGR, modello, modello, cluster, mode) elif not modello.split("_")[0].endswith("nogyre"): pathFRF = os.path.join(outputDir, "4_FRF_LIBRARY/" + str(cluster) + "/" + str( ciclus) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modello, modello, cluster, mode) if root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF') is None: ET.SubElement(root.find('backtracing'), 'REF_M') ET.SubElement(root.find('backtracing/REF_M'), 'C' + str(ciclus)) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus)), 'FRF') ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'posRank').text = str( posRank) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'llg').text = str( '%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'zscore').text = str( '%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'cluster').text = str( cluster) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'top_llg').text = str( '%.2f' % topLLG) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'top_zscore').text = str( '%.2f' % topZSCORE) else: root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/posRank').text = str(posRank) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/zscore').text = str( '%.2f' % rop["zscore"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/cluster').text = str(cluster) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/top_zscore').text = str( '%.2f' % topZSCORE) # REF P1 if useRefP1 and ciclus != numberCyclesRef - 1: pathREFP1 = os.path.join(outputDir, "3_RBR_P1_BRF/" + str(cluster) + "/" + str(ciclus) + "/models.sum") refval, pl = readRefFromSUM(pathREFP1) rop = None for ren in refval: acm = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES", "RNP"]: acm = os.path.basename(ren["corresp"]) if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = acm.split("_") acm = ad[0] + "_" + ad[1] + "_" + ad[-1] if mode == "ARCIMBOLDO-SHREDDER": pass if acm == modello: rop = ren break if root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1') is None: ET.SubElement(root.find('backtracing'), 'REF_M') ET.SubElement(root.find('backtracing/REF_M'), 'C' + str(ciclus)) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus)), 'RNP_P1') ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'llg').text = str( '%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'zscore').text = str( '%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'rmsd').text = str( rop["rmsd"]) else: root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/zscore').text = str( '%.2f' % rop["zscore"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/rmsd').text = str(rop["rmsd"]) mode = "ARCIMBOLDO-BORGES" if useTransla: # FTF pathFTF = os.path.join(outputDir, "6_FTF_Library/" + str(cluster) + "/clustersNoRedPSol.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFTF, modello, modello, None, mode, n_ense=ensem) if root.find('backtracing/FTF') is None: ET.SubElement(root.find('backtracing'), 'FTF') ET.SubElement(root.find('backtracing/FTF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FTF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FTF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FTF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FTF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FTF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FTF/posRank').text = str(posRank) root.find('backtracing/FTF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FTF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FTF/cluster').text = str(cluster) root.find('backtracing/FTF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FTF/top_zscore').text = str('%.2f' % topZSCORE) # PACK if usePacking: pathPACK = os.path.join(outputDir, "7.5_PACK_Library/" + str(cluster) + "/clustersRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathPACK, modello, modello, None, mode, n_ense=ensem) if root.find('backtracing/PACK') is None: ET.SubElement(root.find('backtracing'), 'PACK') ET.SubElement(root.find('backtracing/PACK'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/PACK'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/PACK'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/PACK'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/PACK'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/PACK'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/PACK/posRank').text = str(posRank) root.find('backtracing/PACK/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/PACK/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/PACK/cluster').text = str(cluster) root.find('backtracing/PACK/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/PACK/top_zscore').text = str('%.2f' % topZSCORE) # RNP pathRNP = os.path.join(outputDir, "8_RBR/" + str(cluster) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRNP, modello, modello, None, "RNP", n_ense=ensem) if root.find('backtracing/RNP') is None: ET.SubElement(root.find('backtracing'), 'RNP') ET.SubElement(root.find('backtracing/RNP'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/RNP'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/RNP'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/RNP'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/RNP'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/RNP'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/RNP/posRank').text = str(posRank) root.find('backtracing/RNP/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/RNP/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/RNP/cluster').text = str(cluster) root.find('backtracing/RNP/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/RNP/top_zscore').text = str('%.2f' % topZSCORE) # INITCC pathBCC = os.path.join(outputDir, "9.5_EXP/" + str(cluster) + "/solCC.sum") pathACC = os.path.join(outputDir, "9_EXP/" + str(cluster) + "/solCC.sum") ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA = __getStatFromSumCCAndModel(pathBCC, pathACC, modelloTr, mode) if root.find('backtracing/INITCC') is None: ET.SubElement(root.find('backtracing'), 'INITCC') ET.SubElement(root.find('backtracing/INITCC'), 'BEFORE') # BEFORE REF ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'posRank').text = str(posRankB) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'initcc').text = str('%.2f' % ropB[0]) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'top_initcc').text = str('%.2f' % topINITCC_B) ET.SubElement(root.find('backtracing/INITCC'), 'AFTER') # AFTER REF ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'posRank').text = str(posRankA) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'initcc').text = str('%.2f' % ropA[0]) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'top_initcc').text = str('%.2f' % topINITCC_A) else: root.find('backtracing/INITCC/BEFORE/posRank').text = str(posRankB) root.find('backtracing/INITCC/BEFORE/initcc').text = str('%.2f' % ropB[0]) root.find('backtracing/INITCC/BEFORE/cluster').text = str(cluster) root.find('backtracing/INITCC/BEFORE/top_initcc').text = str('%.2f' % topINITCC_B) # AFTER REF root.find('backtracing/INITCC/AFTER/posRank').text = str(posRankA) root.find('backtracing/INITCC/AFTER/initcc').text = str('%.2f' % ropA[0]) root.find('backtracing/INITCC/AFTER/cluster').text = str(cluster) root.find('backtracing/INITCC/AFTER/top_initcc').text = str('%.2f' % topINITCC_A) if typemode == "FAST": ciclus = cycle # FAST p = subprocess.Popen("grep -H CC " + os.path.join(outputDir, fromdirexp + "/" + str(cluster) + "/" + str( ciclus) + "/*/" + os.path.basename(backPDB)), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('backtracing/EXP') is None: ET.SubElement(root.find('backtracing'), 'EXP') ET.SubElement(root.find('backtracing/EXP'), 'cycle').text = str(ciclus) ET.SubElement(root.find('backtracing/EXP'), 'finalcc').text = str('%.2f' % finalcc[0][0]) ET.SubElement(root.find('backtracing/EXP'), 'restraced').text = str('%.2f' % finalcc[0][1]) else: root.find('backtracing/EXP/cycle').text = str(ciclus) root.find('backtracing/EXP/finalcc').text = str('%.2f' % finalcc[0][0]) root.find('backtracing/EXP/restraced').text = str('%.2f' % finalcc[0][1]) shutil.copyfile(backPDB, os.path.join(outputDir, "best.pdb")) if fromphis: best_ext = ".phi" else: best_ext = ".pda" shutil.copyfile(backPDB[:-4] + best_ext, os.path.join(outputDir, "best" + best_ext)) shutil.copyfile(backPDB[:-4] + ".phs", os.path.join(outputDir, "best.phs")) shutil.copyfile(backPDB[:-4] + ".lst", os.path.join(outputDir, "best.lst")) if float(FINALCC) >= 30: SOLVED = True if (mode.startswith("ARCIMBOLDO-BORGES") or mode == "ARCIMBOLDO-CLUSTERS") and step == "FRF" and frag_fixed == 1: stats, listRotaClus, ntotalpdbs, nmaxllg, nmaxzscore = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, getGlobalStats=True) # NOTE: This was the old way using first by number of rotations and for clusters with the same size by top llg # listRotaClus = sorted(listRotaClus,reverse=True) # stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min] listRotaClus = sorted(listRotaClus, key=lambda x: ( (stats[x[2]][1] / ntotalpdbs) + (stats[x[2]][2] / nmaxllg) + (stats[x[2]][4] / nmaxzscore)), reverse=True) # print listRotaClus # NOTE: Filter by mean rotations mean_all_rot = map(lambda x: x[0], listRotaClus) mean_all_rot = numpy.mean(numpy.array(mean_all_rot)) # NOTE: Filter by mean LLG # mean_all_llg = map(lambda x: x[1], listRotaClus) # mean_all_llg = numpy.mean(numpy.array(mean_all_llg)) listClusters = [] # best,clubest=listRotaClus[0] # listClusters.append(clubest) for t in range(len(listRotaClus)): npdbs, llgclu, nclu = listRotaClus[t] # print diff, (best*50/100.0), diff < (best*50/100.0) # NOTE: Filter out by LLG # if llgclu >= mean_all_llg-10: #(best*50/100.0): # NOTE: Filter out by Number of rotations if not filterClusters or npdbs >= mean_all_rot: # (best*50/100.0): listClusters.append(nclu) ET.SubElement(root, 'rot_clustering') ET.SubElement(root.find('rot_clustering'), 'full') ET.SubElement(root.find('rot_clustering'), 'reduced') ET.SubElement(root.find('rot_clustering'), 'green') s = [] h = [] g = [] for indre in listRotaClus: npdbs, nrot, nclu = indre if nclu in listClusters: s.append((nclu, '""')) h.append((nclu, '""')) g.append(nclu) else: h.append((nclu, '"none"')) s.append((nclu, '""')) root.find('rot_clustering/full').text = json.dumps(s) root.find('rot_clustering/reduced').text = json.dumps(h) root.find('rot_clustering/green').text = json.dumps(g) s = h = g = None data_graph = "[" liun = stats.keys() role = True for ind in range(len(liun)): key = liun[ind] value = stats[key] data_graph += """['""" + str(key) + """', """ + str(value[2]) + """, """ + str(value[4]) + """, """ + str( value[0]) + """, """ + str(role).lower() + """]""" role = not role if ind == len(liun) - 1: data_graph += """\n""" else: data_graph += """,\n""" data_graph += """\n]\n""" ET.SubElement(root.find('rot_clustering'), 'graphfull').text = data_graph liun = stats.keys() data_graph = "[" role = True for ind in range(len(liun)): key = liun[ind] if int(key) not in listClusters: continue value = stats[key] data_graph += """['""" + str(key) + """', """ + str(value[2]) + """, """ + str(value[4]) + """, """ + str( value[0]) + """, """ + str(role).lower() + """]""" role = not role if ind == len(liun) - 1: data_graph += """\n""" else: data_graph += """,\n""" data_graph += """\n]\n""" ET.SubElement(root.find('rot_clustering'), 'graphreduced').text = data_graph for key in stats.keys(): value = stats[key] ET.SubElement(root.find('rot_clustering'), 'FRF-' + str(key)) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'cluster').text = str(key) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'rotations').text = str(value[0]) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'distinct').text = str(value[1]) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'top_llg').text = str('%.2f' % (value[2])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'mean_llg').text = str('%.2f' % (value[3])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'top_zscore').text = str('%.2f' % (value[4])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'mean_zscore').text = str('%.2f' % (value[5])) ed = root.find('TIME') listatime = [] if ed is not None: listatime = json.loads(ed.text) else: ed = ET.SubElement(root, 'TIME') listatime.append((mode, step, str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))) ed.text = json.dumps(listatime) tree.write(nomexml) print "XML WRITTEN", nomexml, frag_fixed # NOTE: TEMPORARY DEACTIVATED THE FOLLOWING IF if SOLVED: print "SOLVED THE STRUCTURE", FINALCC lock.release() forceGenerateHTML(lock, outputDir, filename) if STOP_IF_SOLVED: sys.exit(0) if mode.startswith("ARCIMBOLDO-BORGES") and step == "FRF": lock.release() return listClusters if not SOLVED: lock.release() def writeSumClusters(Clusters, dirout, filename, convNames, RotClu=[], LIMIT_CLUSTER=None, saveMAP=False, euler_frac_zero=False): global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB if not os.path.exists(dirout): os.makedirs(dirout) pathfull = os.path.join(dirout, filename + ".sum") f = open(pathfull, "w") frag_fixed = False for ci in range(len(Clusters)): if LIMIT_CLUSTER != None and LIMIT_CLUSTER != ci: continue clu = Clusters[ci] if clu == None: f.write("============================== CLUSTER " + str( ci) + " ====== LONGEST SHIFT 0 0 ==============================" + "\n") f.write("===============================================================================" + "\n") continue lion = (clu["heapSolutions"]).asList() if "longest" not in clu: clu["longest"] = (0, 0) f.write("============================== CLUSTER " + str(ci) + " ====== LONGEST SHIFT " + str( clu["longest"][0]) + " " + str(clu["longest"][1]) + " ==============================" + "\n") for i in range(len(lion)): rota = (lion[i])[1] if euler_frac_zero: rota["euler"] = [0.0, 0.0, 0.0] rota["frac"] = [0.0, 0.0, 0.0] pdbname = "None" try: pdbname = convNames[rota["name"]] except: pdbname = "None" if "fixed_frags" not in rota: rota["fixed_frags"] = [] if "fixed_frags" in rota: frag_fixed = True f.write("///////////////FIXED FRAGMENTS///////////////" + "\n") for w in range(len(rota["fixed_frags"])): rotap = (rota["fixed_frags"])[w] pdbname2 = "None" try: pdbname2 = convNames[rotap["name"]] except: pdbname2 = "None" f.write("From Cluster: " + str(rotap["n_prev_cluster"]) + "\t") if "original_rotcluster" in rotap: f.write("Original Rot. Cluster: " + str(rotap["original_rotcluster"]) + "\n") if "numInRlist" in rotap: f.write( "n. " + str(i) + " coming from the pdb: " + pdbname2 + " ---> " + rotap["name"] + " ---> " + rotap["name"] + ".rlist" + " n.Rota: " + str(rotap["numInRlist"]) + "\n") else: f.write( "n. " + str(i) + " coming from the pdb: " + pdbname2 + " ---> " + rotap["name"] + " ---> " + rotap["name"] + ".rlist" + " n.Rota: " + "unknown" + "\n") if "vrms" in rotap: f.write("EULER: " + str(rotap["euler"]) + "\t" + "QUATERNION:" + str( rotap["quaternion"]) + "\tVRMSD: " + str(rotap["vrms"]) + "\n") else: f.write( "EULER: " + str(rotap["euler"]) + "\t" + "QUATERNION:" + str(rotap["quaternion"]) + "\n") if "numInSol" in rotap: f.write("FRAC: " + str(rotap["frac"]) + "\t" + "n.Sol: " + str(rotap["numInSol"]) + "\n") else: f.write("FRAC: " + str(rotap["frac"]) + "\n") if "parameters" not in rotap.keys(): f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str(rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\n") else: f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str(rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\tPDB: " + str( rotap["parameters"]["model_pdb"]) + "\tRMSD: " + str( rotap["parameters"]["rmsd"]) + "\tROT_RES: " + str( rotap["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str( rotap["parameters"]["rot_sampl"]) + "\n") for lis in (rotap["rotationMatrices"])[1:2]: f.write("EQUIVALENT SIMMETRY: " + str(lis) + "\n") f.write("----------------" + "\n") f.write("/////////////////////////////////////////////" + "\n") if "n_prev_cluster" in rota: f.write("From Cluster: " + str(rota["n_prev_cluster"]) + "\t") else: f.write("From Cluster: " + str(ci) + " ") if "original_rotcluster" in rota: f.write("Original Rot. Cluster: " + str(rota["original_rotcluster"]) + "\n") if "numInRlist" in rota: f.write("n. " + str(i) + " coming from the pdb: " + pdbname + " ---> " + rota["name"] + " ---> " + rota[ "name"] + ".rlist" + " n.Rota: " + str(rota["numInRlist"]) + "\n") else: f.write("n. " + str(i) + " coming from the pdb: " + pdbname + " ---> " + rota["name"] + " ---> " + rota[ "name"] + ".rlist" + " n.Rota: " + "unknown" + "\n") if "vrms" in rota: f.write( "EULER: " + str(rota["euler"]) + "\t" + "QUATERNION:" + str(rota["quaternion"]) + "\tVRMSD: " + str( rota["vrms"]) + "\n") else: f.write("EULER: " + str(rota["euler"]) + "\t" + "QUATERNION:" + str(rota["quaternion"]) + "\n") if "numInSol" in rota: f.write("FRAC: " + str(rota["frac"]) + "\t" + "n.Sol: " + str(rota["numInSol"]) + "\n") else: f.write("FRAC: " + str(rota["frac"]) + "\n") if "parameters" not in rota.keys(): f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\n") else: f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\tPDB: " + str( rota["parameters"]["model_pdb"]) + "\tRMSD: " + str( rota["parameters"]["rmsd"]) + "\tROT_RES: " + str( rota["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str(rota["parameters"]["rot_sampl"]) + "\n") for lis in (rota["rotationMatrices"])[1:2]: f.write("EQUIVALENT SIMMETRY: " + str(lis) + "\n") f.write("----------------" + "\n") f.write("===============================================================================" + "\n") f.close() if frag_fixed: f = open(dirout + filename + "Rotations.sum", "w") pickle.dump(LAST_AVAILABLE_ROTID, f) pickle.dump(MAP_OF_ROT_COMB, f) f.close() if len(RotClu) == 0: return # All the following will be executed only if we are not at the first fragment search f = open(dirout + filename + "Rotations.sum", "w") pickle.dump(LAST_AVAILABLE_ROTID, f) pickle.dump(MAP_OF_ROT_COMB, f) f.close() if not saveMAP: return writeSumClusters(RotClu, dirout, filename + "ROTCLU", convNames, RotClu=[]) def getListPDBtoPerform(Clusters, convNames, dirBase=None): lista = [] for clu in Clusters: for rota in clu: if dirBase == None: lista.append(convNames[rota["name"]]) else: lista.append(dirBase + convNames[rota["name"]]) return lista def analyzeROTclusters(DicParameters, sumPathAll, sumPathEnt, outputDir, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, fromV, toV): Clu2, cnv2 = readClustersFromSUM(sumPathEnt) f = open(os.path.join(outputDir, "evaluation.sum"), "w") f.write("ENT file produces " + str(len(Clu2)) + " cluster rotations.\n\n") result = False for i in range(len(Clu2)): clu2 = Clu2[i] liClu2 = clu2['heapSolutions'].asList() prio2, rota2 = liClu2[0] result2 = False Clu1, cnv1 = readClustersFromSUM(sumPathAll) for j in range(len(Clu1)): clu1 = Clu1[j] liClu1 = clu1['heapSolutions'].asList() prio1, rota1 = liClu1[0] # if rota2["n_prev_cluster"] == 0 and rota1["n_prev_cluster"] == 13: # print rota2["name"],cnv2[rota2["name"]],rota2["llg"],rota2["zscore"] # print rota1["name"],cnv1[rota1["name"]],rota1["llg"],rota1["zscore"] # result,elong = compareRotation(rota2,rota1,thresholdCompare,ClusteringMode,quate,laue,listNCS,ensembles,cell_dim,evaLLONG,print_angles=True) # else: result, elong = compareRotation(rota2, rota1, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) angle = rota1["angle"] if result: f.write("Rotation ENT cluster id. " + str(rota2["n_prev_cluster"]) + " with LLG " + str( rota2["llg"]) + " and ZSCORE " + str( rota2["zscore"]) + " corresponds to library cluster id. " + str( rota1["n_prev_cluster"]) + " with LLG " + str(rota1["llg"]) + " and ZSCORE " + str( rota1["zscore"]) + "\n") result2 = True f.write("ENT Cl: " + str(rota2["n_prev_cluster"]) + " LIB Cl: " + str( rota1["n_prev_cluster"]) + " min. angle: " + str(angle) + "\n") if not result2: f.write("Rotation ENT cluster id. " + str(rota2["n_prev_cluster"]) + " with LLG " + str( rota2["llg"]) + " and ZSCORE " + str(rota2["zscore"]) + " not corresponds to any library clusters\n") f.close() lid = [] for i in range(len(Clu1)): clu1 = Clu1[i] liClu1 = clu1['heapSolutions'].asList() stats = [] idcluster = 0 for item in liClu1: prio, rota = item pdbf = os.path.basename(cnv1[rota["name"]]) pdbid = pdbf.split("_")[0] stats.append([int(pdbid[4:]), rota["llg"], rota["zscore"], rota["numInRlist"]]) idcluster = rota["n_prev_cluster"] lid.append(idcluster) stats = sorted(stats, __cmp_statsrt) # values = tuple(map(lambda x: x[3],stats)) # names = tuple(map(lambda x: x[0],stats)) f = open(os.path.join(outputDir, "clust" + str(i) + ".dat"), "w") ult_nome = fromV for item in stats: cur_nome = item[0] while ult_nome < cur_nome: f.write(str(ult_nome) + "\t0\t0\n") ult_nome += 1 f.write(str(item[0]) + "\t" + str(item[3]) + "\t" + str(item[1]) + "\n") ult_nome += 1 if ult_nome < toV: f.write(str(ult_nome) + "\t0\t0\n") ult_nome += 1 f.close() for i in range(len(Clu1)): f = open(os.path.join(outputDir, "clust" + str(i) + ".scr"), "w") stringas = """ clear reset #set terminal postscript eps size 3.5,2.62 enhanced color font 'Helvetica,20' linewidth 2 set terminal png size 2000,800 set output '""" + os.path.abspath(os.path.join(outputDir, "clust" + str(i) + ".png")) + """' unset key set xtics rotate out set style data histograms set style fill solid border set style histogram clustered plot 'clust""" + str(i) + """.dat' using 2:xticlabels(1) title 'Analisys cluster """ + str(lid[i]) + """', '' using 3:xticlabels(1) """ f.write(stringas) f.close() # p = subprocess.Popen(['gnuplot', os.path.join(outputDir,"clust"+str(i)+".scr")], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # out, err = p.communicate() # print out # print err def __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, bestRota, ci, i, modeTra="frac", LIMIT_CLUSTER=None, renameWithConvNames=False, sufixSolPos=False): structureRef = None quo = None nomeFile = "" listNames = [] if elongatingModel == None or clu["longest"] == [0, 0]: parser = PDBParser() # structure=parser.get_structure(bestRota["name"],bestRota["baseDir"]+bestRota["name"]) structure = parser.get_structure(bestRota["name"], convNames[bestRota["name"]]) elif elongatingModel != None: # print "devo allungare elica",clu["longest"] structure = getLongerFragment(elongatingModel, clu["longest"], convNames[bestRota["name"]]) if mode == "matrix": rotateStructureByMatrix(ci, i, structure, bestRota["rotationMatrices"], dirout, cell_dim) nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) elif mode == "quaternion": rotateStructureByQuaternion(ci, i, structure, quate, bestRota["quaternion"], dirout) nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) elif mode == "simmetry_rotated": if "simmetry_rotated" in bestRota: # rotateStructureByQuaternion(ci,i,structure,quate,rota["quaternion"],dirout) # parser=PDBParser() # structure=parser.get_structure(rota["name"],dirout+str(ci)+"_"+str(i)+"_rot.pdb") rotateStructureByQuaternion(ci, i, structure, quate, bestRota["simmetry_rotated"], dirout) quo = bestRota["simmetry_rotated"] nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) else: rotateStructureByQuaternion(ci, i, structure, quate, bestRota["quaternion"], dirout) quo = bestRota["quaternion"] nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) listNames.append(nomeFile) if createSimmetry: nSymm = 100 * i for rti, rtq in (quate.matricesRot[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue # new_quat = quate.RotateQuaternion(rot1["quaternion"],rtq) # aas = [[[1,0,0],[0,1,0],[0,0,1]]]+[rtq]#bestRota["rotationMatrices"]#+[rtq] aas = bestRota["rotationMatrices"] + [rtq] rotateStructureByMatrix(ci, i + nSymm, structure, aas, dirout, cell_dim) nomeFile = dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb", nomeFile) nSymm += 1 listNames.append(nomeFile) if modeTra == "frac" and performTranslation: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(bestRota["name"], nomeFile) translateStructurebyFrac(ci, i, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyFrac(ci, i, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str( i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 elif performTranslation and modeTra == "Cmass": # print "Devo scrivere traslai",i if i == 0: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structureRef = parser.get_structure(bestRota["name"], dirout + os.path.basename(convNames[bestRota["name"]])) translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirout) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structureRef = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirout) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[ :-4] + str(i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 else: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(bestRota["name"], dirout + os.path.basename(convNames[bestRota["name"]])) translateStructurebyCentroidMass(ci, i, structure, structureRef, dirout) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structure, structureRef, dirout) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[ :-4] + str(i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 return listNames def readRefFromSUM(sumPath): f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 CCVAL = [] while line != None and line != "": # process line if line.startswith("======="): print "start reading Ref. n. " + str(numClus) riga1 = line riga2 = (f.readline()).split() riga3 = (f.readline()).split() riga4 = f.readline() model = int(riga2[1]) corresp = getNewPathFromMerging(sumPath,riga2[3]) rmsd = float(riga2[5]) llg = float(riga3[1]) zscore = float(riga3[3]) nref = float(riga3[5]) ncom = float(riga3[7]) dizio = {"model": model, "corresp": corresp, "rmsd": rmsd, "llg": llg, "zscore": zscore, "nref": nref, "ncom": ncom} CCVAL.append(dizio) convNames[os.path.basename(corresp)] = corresp line = f.readline() numClus += 1 f.close() return (CCVAL, convNames) def readCCValFromSUM(sumPath): if not os.path.exists(sumPath): return [], {} f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 CCVAL = [] while line != None and line != "": # process line if line.startswith("======="): print "start reading CC_Val n. " + str(numClus) riga1 = line riga2 = (f.readline()).split() riga3 = (f.readline()).split() riga4 = (f.readline()).split() riga5 = f.readline() model = int(riga2[1]) corresp = getNewPathFromMerging(sumPath,riga2[3]) cluster = riga2[5] if cluster == "None": cluster = "None" else: cluster = cluster nAtoms = int(riga3[1]) nER = int(riga3[3]) initCC = float(riga3[5]) finalCC = float(riga3[7]) wMPEa = float(riga3[9]) wMPEb = float(riga3[11]) wMPEc = float(riga3[13]) wMPEd = float(riga3[15]) shx = float(riga4[1]) shy = float(riga4[2]) shz = float(riga4[3]) contrast = float(riga4[5]) connect = float(riga4[7]) mfom = float(riga4[9]) sfom = float(riga4[11]) dizio = {"model": model, "corresp": corresp, "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": cluster, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": contrast, "connect": connect, "mfom": mfom, "sfom": sfom} CCVAL.append(dizio) line = f.readline() numClus += 1 f.close() return (CCVAL, convNames) def readClustersFromSUMToDB(DicParameters, sumPath, table, LIMIT_CLUSTER=None, skip_reading_variables=False, give_fixed_frags=False,euler_to_zero=False): global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB print "Reading:", sumPath please_exit = False if os.path.exists(sumPath): f = open(sumPath, "r") if f.readline() == "": f.close() please_exit = True else: please_exit = True if please_exit: if give_fixed_frags: return {}, [], [], [], {} else: return {}, [], [], {} genera = readClustersFromSUMwithYELD(sumPath,euler_to_zero=euler_to_zero) if not skip_reading_variables and os.path.exists(sumPath[:-4] + "Rotations.sum"): # print "TABLE",table,sumPath f = open(sumPath[:-4] + "Rotations.sum", "r") LAST_AVAILABLE_ROTID = pickle.load(f) MAP_OF_ROT_COMB = pickle.load(f) f.close() RotClu = [] encn = {} if not skip_reading_variables and os.path.exists(sumPath[:-4] + "ROTCLU.sum"): encn, RotClu, lose, lose2 = readClustersFromSUMToDB(DicParameters, sumPath[:-4] + "ROTCLU.sum", "ROTCLU", LIMIT_CLUSTER=LIMIT_CLUSTER, skip_reading_variables=True) ClusAll = [] numero_all_clu = genera.next() numfixed = 0 for clu in range(numero_all_clu): dicton = {"heapSolutions": ADT.Heap()} sol = genera.next() if sol == None: continue numclu = sol[1]["n_prev_cluster"] while sol != None: prio, rota = sol dicton["heapSolutions"].push(prio, rota) sol = genera.next() if len(ClusAll) <= numclu: while len(ClusAll) < numclu + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[numclu] = dicton if not skip_reading_variables: LAST_AVAILABLE_ROTID = numero_all_clu if give_fixed_frags: return genera.next(), ClusAll, RotClu, encn, numfixed else: return genera.next(), ClusAll, RotClu, encn def getNewPathFromMerging(sumPath,rigali): global BASE_SUM_FROM_WD rigali = os.path.normpath(rigali) sumPath = os.path.normpath(sumPath) tor = "" if not BASE_SUM_FROM_WD: tor = rigali else: #NOTE: I could create a function to extract the WD from a path #To be honest it should be better to put complete names as 1_FRF_LIBRARY #To avoid splitting in places where we do not want as in 3efg_1_0.pdb sumPath = sumPath.split("ELLG_COMP")[0] sumPath = sumPath.split("_ensembles")[0] sumPath = sumPath.split("ens1_frag")[0] sumPath = sumPath.split("1_")[0] sumPath = sumPath.split("2_")[0] sumPath = sumPath.split("6_")[0] sumPath = sumPath.split("7.5_")[0] sumPath = sumPath.split("onemodel")[0] sumPath = sumPath.split("3_")[0] sumPath = sumPath.split("4_")[0] sumPath = sumPath.split("8.5_")[0] sumPath = sumPath.split("8_")[0] sumPath = sumPath.split("9.5_")[0] sumPath = sumPath.split("9_")[0] sumPath = sumPath.split("10_")[0] sumPath = sumPath.split("11_")[0] if sumPath.endswith("/"): sumPath = sumPath[:-1] else: sumPath = os.path.split(sumPath)[0] rigas = rigali rigas = rigas.split("ELLG_COMP")[0] rigas = rigas.split("ens1_frag")[0] rigas = rigas.split("_ensembles")[0] rigas = rigas.split("1_")[0] rigas = rigas.split("2_")[0] rigas = rigas.split("6_")[0] rigas = rigas.split("7.5_")[0] rigas = rigas.split("onemodel")[0] rigas = rigas.split("3_")[0] rigas = rigas.split("4_")[0] rigas = rigas.split("8.5_")[0] rigas = rigas.split("8_")[0] rigas = rigas.split("9.5_")[0] rigas = rigas.split("9_")[0] rigas = rigas.split("10_")[0] rigas = rigas.split("11_")[0] rigas = rigas.split("_ensembles")[0] if not rigas.endswith("/"): rigas = os.path.split(rigas)[0] #print "rigali===================",rigali if os.path.exists(rigali): tor = rigali else: #print sumPath #print rigas strinit1,strcommon1,strend1 = ADT.split_string_by_common(rigali, rigas) if strend1.startswith("/"): strend1 = strend1[1:] #print strinit1,strcommon1,strend1 tor = os.path.join(sumPath,strend1) """ common = ADT.LCS(sumPath, rigali) # base = sumPath.split(common)[0] #print "VERY COMMON",common different, common, starting, ending = ADT.substract_string(sumPath, common) different2, common2, starting2, ending2 = ADT.substract_string(rigali, common) #print "sumpath",sumPath,"COMMON",common #print "STARTING",starting,"1",os.path.join(common,different2),"2",os.path.join(different,common) if starting and len(common) > 0: print "----",common, different2 tor = os.path.join(common, different2) elif ending and len(common) > 0: tor = os.path.join(different, common) elif (not starting and not ending) and len(common) == 0: tor = rigali elif (not starting and not ending): strinit1,strcommon1,strend1 = ADT.split_string_by_common(sumPath, common) strinit2,strcommon2,strend2 = ADT.split_string_by_common(rigali, common) if os.path.exists(os.path.join(os.path.join(strinit1,strcommon1),strend2)): #CASE with commom /text/ tor = os.path.join(os.path.join(strinit1,strcommon1),strend2) else: tor = os.path.join(strinit1+strcommon1,strend2) print strinit1,strcommon1,strend1 print strinit2,strcommon2,strend2 print sumPath print rigali print common print tor """ if not os.path.exists(tor): print tor print "ATTENTION: The program cannot automatically recover and interpret the precomputed run from another working directory.\n Please, remove intermediate directory and restart." sys.exit(0) return tor def readClustersFromSUMwithYELD(sumPath,euler_to_zero=False): ntrials = 10 nt = 0 out = "" numClus = None while nt <= ntrials: try: p = subprocess.Popen(["grep", "= CLUSTER", sumPath], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() numClus = len(out.splitlines()) break except: print "subprocess not ready...." nt += 1 # time.sleep(4) if len(out) == 0: f = open(sumPath) e = f.read() f.close() numClus = e.count("= CLUSTER") yield numClus f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 while line != None and line != "": # process line if line.startswith("================="): print "start reading Cluster n. " + str(numClus) ytr = line.split() clu = {} if ytr[6] == "None": ytr[6] = 0 if ytr[7] == "None": ytr[7] = 0 clu["longest"] = [int(ytr[6]), int(ytr[7])] line = f.readline() numRot = 0 quot = Quaternions.Quaternions() signs = None while not line.startswith("==================="): fixed = [] if line.startswith("/////////"): line = f.readline() while not line.startswith("////////"): # print "...inserting rotation n. "+str(numRot) dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) if euler_to_zero: dizio["euler"] = [0.0, 0.0, 0.0] else: dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) if len(riga4.split()) == 16: dizio["parameters"] = {"model_pdb": riga4.split()[9], "rmsd": float(riga4.split()[11]), "rot_res": float(riga4.split()[13]), "rot_sampl": float(riga4.split()[15])} q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] try: vrms = float(((riga2.split())[9])) dizio["vrms"] = vrms except: pass # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath,riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): fixed.append(dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() line = f.readline() dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() if len(fixed) > 0: dizio["fixed_frags"] = fixed e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) if euler_to_zero: dizio["euler"] = [0.0, 0.0, 0.0] else: dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) if len(riga4.split()) == 16: dizio["parameters"] = {"model_pdb": riga4.split()[9], "rmsd": float(riga4.split()[11]), "rot_res": float(riga4.split()[13]), "rot_sampl": float(riga4.split()[15])} q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] try: vrms = float(((riga2.split())[9])) dizio["vrms"] = vrms except: pass # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath,riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): priority = (-1 * dizio["llg"], -1 * dizio["zscore"]) qlo = None if "simmetry_rotated" in dizio.keys(): qlo = dizio["simmetry_rotated"] else: qlo = dizio["quaternion"] if signs == None: signs = quot.signOfQuaternionComponents(qlo) soltuple = (priority, dizio) yield soltuple else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() numRot += 1 print "End reading of cluster n. " + str(numClus) yield None numClus += 1 line = f.readline() else: line = f.readline() f.close() yield convNames def readClustersFromSUM(sumPath): global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB print "Reading:", sumPath reloaded_variables = False if os.path.exists(sumPath[:-4] + "Rotations.sum"): f = open(sumPath[:-4] + "Rotations.sum", "r") LAST_AVAILABLE_ROTID = pickle.load(f) MAP_OF_ROT_COMB = pickle.load(f) f.close() reloaded_variables = True f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 while line != None and line != "": # process line if line.startswith("================="): print "start reading Cluster n. " + str(numClus) ytr = line.split() clu = {} if ytr[6] == "None": ytr[6] = 0 if ytr[7] == "None": ytr[7] = 0 clu["longest"] = [int(ytr[6]), int(ytr[7])] hp = ADT.Heap() line = f.readline() numRot = 0 dul = {} dul["quaternion"] = [0.0, 0.0, 0.0, 0.0] dul["euler"] = [0.0, 0.0, 0.0] dul["frac"] = [0.0, 0.0, 0.0] dul["llg"] = 0.0 dul["zscore"] = 0.0 dul["sumQuat"] = [0.0, 0.0, 0.0, 0.0] dul["sumEuler"] = [0.0, 0.0, 0.0] dul["sumLlg"] = 0.0 dul["sumZscore"] = 0.0 dul["sumFrac"] = [0.0, 0.0, 0.0] quot = Quaternions.Quaternions() signs = None while not line.startswith("==================="): fixed = [] if line.startswith("/////////"): line = f.readline() while not line.startswith("////////"): # print "...inserting rotation n. "+str(numRot) dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath,riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): fixed.append(dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() line = f.readline() dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() if len(fixed) > 0: dizio["fixed_frags"] = fixed e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath,riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): priority = (-1 * dizio["llg"], -1 * dizio["zscore"]) qlo = None if "simmetry_rotated" in dizio.keys(): qlo = dizio["simmetry_rotated"] else: qlo = dizio["quaternion"] if signs == None: signs = quot.signOfQuaternionComponents(qlo) (dul["sumQuat"])[0] += abs(qlo[0]) (dul["sumQuat"])[1] += abs(qlo[1]) (dul["sumQuat"])[2] += abs(qlo[2]) (dul["sumQuat"])[3] += abs(qlo[3]) (dul["sumEuler"])[0] += (dizio["euler"])[0] (dul["sumEuler"])[1] += (dizio["euler"])[1] (dul["sumEuler"])[2] += (dizio["euler"])[2] (dul["sumFrac"])[0] += (dizio["frac"])[0] (dul["sumFrac"])[1] += (dizio["frac"])[1] (dul["sumFrac"])[2] += (dizio["frac"])[2] dul["sumLlg"] += dizio["llg"] dul["sumZscore"] += dizio["zscore"] hp.push(priority, dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() numRot += 1 clu["heapSolutions"] = hp if hp.len() > 0: (dul["quaternion"])[0] = signs[0] * ((dul["sumQuat"])[0] / hp.len()) (dul["quaternion"])[1] = signs[1] * ((dul["sumQuat"])[1] / hp.len()) (dul["quaternion"])[2] = signs[2] * ((dul["sumQuat"])[2] / hp.len()) (dul["euler"])[0] = (dul["sumEuler"])[0] / hp.len() (dul["euler"])[1] = (dul["sumEuler"])[1] / hp.len() (dul["euler"])[2] = (dul["sumEuler"])[2] / hp.len() (dul["frac"])[0] = (dul["sumFrac"])[0] / hp.len() (dul["frac"])[1] = (dul["sumFrac"])[1] / hp.len() (dul["frac"])[2] = (dul["sumFrac"])[2] / hp.len() dul["llg"] = dul["sumLlg"] / hp.len() dul["zscore"] = dul["sumZscore"] / hp.len() clu['centroid'] = dul Clusters.append(clu) print 'End reading of cluster n. ' + str(numClus) numClus += 1 line = f.readline() else: line = f.readline() f.close() if not reloaded_variables: LAST_AVAILABLE_ROTID = numClus return (Clusters, convNames) def filterClustersByKmeansCrossValidated(Clusters, convNames, limit, treshJump, rotaPerSubGroup): Clus = [] for i in range(len(Clusters)): print 'SubClustering with K-Means Algorithm of the Cluster: ' + str(i) clu = Clusters[i] liClu = clu['heapSolutions'].asList() if len(liClu) < limit: Clus.append(clu) continue else: hp = ADT.Heap() dizio = {} dul = {} dul['quaternion'] = [0.0, 0.0, 0.0, 0.0] dul['euler'] = [0.0, 0.0, 0.0] dul['frac'] = [0.0, 0.0, 0.0] dul['llg'] = 0.0 dul['zscore'] = 0.0 dul['sumQuat'] = [0.0, 0.0, 0.0, 0.0] dul['sumEuler'] = [0.0, 0.0, 0.0] dul['sumLlg'] = 0.0 dul['sumZscore'] = 0.0 dul['sumFrac'] = [0.0, 0.0, 0.0] listaFOM = [] for r in liClu: prio = r[0] rota = r[1] listaFOM.append([rota['llg'], rota['zscore']]) narr = numpy.array(listaFOM) whitened = scipy.cluster.vq.whiten(narr) valori = [] v = 5 whishu = copy.deepcopy(whitened) numpy.random.shuffle(whishu) print 'V-Parameter chosen for the cross-validation is: ' + str(v) subs = numpy.array_split(whishu, v) kappa = 0 start = numpy.sqrt(len(whitened) / 2) / 2 for i in range(start, start * 2 * 2): avg_crossv = 0.0 for q in range(len(subs)): test = subs[q] avg_sqd = 0.0 for z in range(len(subs)): if z != q: training = subs[z] groups, labels = scipy.cluster.vq.kmeans2(training, i, iter=20, minit='points') sum_sqd = 0.0 for ctest in test: sqd_min = numpy.inf for centroid in groups: sqd = (centroid[0] - ctest[0]) ** 2 + (centroid[1] - ctest[1]) ** 2 if sqd < sqd_min: sqd_min = sqd sum_sqd += sqd_min avg_sqd += sum_sqd avg_sqd /= v - 1 avg_crossv += avg_sqd avg_crossv /= v valori.append([avg_crossv, i]) print i, avg_crossv, kappa = i if len(valori) > 1: jump = valori[-2][0] - valori[-1][0] print valori[-2][0] - valori[-1][0] if jump <= treshJump: break else: print print 'Performing a subcluster with K-Means with K=' + str(kappa) groups, labels = scipy.cluster.vq.kmeans2(whitened, kappa, iter=20, minit='points') subclu = [ADT.Heap() for _ in range(kappa)] for p in range(len(labels)): ind = labels[p] rota = liClu[p] subclu[ind].push(rota[0], rota[1]) quot = Quaternions.Quaternions() signs = None for sc in subclu: for l in range(rotaPerSubGroup): try: prio, item = sc.pop() hp.push(prio, item) qlo = None if 'simmetry_rotated' in item.keys(): qlo = item['simmetry_rotated'] else: qlo = item['quaternion'] if signs == None: signs = quot.signOfQuaternionComponents(qlo) dul['sumQuat'][0] += abs(qlo[0]) dul['sumQuat'][1] += abs(qlo[1]) dul['sumQuat'][2] += abs(qlo[2]) dul['sumQuat'][3] += abs(qlo[3]) dul['sumEuler'][0] += item['euler'][0] dul['sumEuler'][1] += item['euler'][1] dul['sumEuler'][2] += item['euler'][2] dul['sumFrac'][0] += item['frac'][0] dul['sumFrac'][1] += item['frac'][1] dul['sumFrac'][2] += item['frac'][2] dul['sumLlg'] += item['llg'] dul['sumZscore'] += item['zscore'] except: pass if hp.len() > 0: dul['quaternion'][0] = signs[0] * (dul['sumQuat'][0] / hp.len()) dul['quaternion'][1] = signs[1] * (dul['sumQuat'][1] / hp.len()) dul['quaternion'][2] = signs[2] * (dul['sumQuat'][2] / hp.len()) dul['quaternion'][3] = signs[3] * (dul['sumQuat'][3] / hp.len()) dul['euler'][0] = dul['sumEuler'][0] / hp.len() dul['euler'][1] = dul['sumEuler'][1] / hp.len() dul['euler'][2] = dul['sumEuler'][2] / hp.len() dul['frac'][0] = dul['sumFrac'][0] / hp.len() dul['frac'][1] = dul['sumFrac'][1] / hp.len() dul['frac'][2] = dul['sumFrac'][2] / hp.len() dul['llg'] = dul['sumLlg'] / hp.len() dul['zscore'] = dul['sumZscore'] / hp.len() dizio['centroid'] = dul dizio['heapSolutions'] = hp Clus.append(dizio) return Clus def evaluateFTF(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, ensembles, excludeZscore, fixed_frags, quate, mode, laue, listNCS, clusteringMode, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, convNames={}, applyNameFilter=False, renamePDBs=True, giveids=False, isArcimboldo=False): dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = True listaAllsol = [[], [], {}] nfixfrags = 0 ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 havetoskip = False while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_OUT_ANY_CASE, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, failure_test=PHASER_OUT_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break if mode == 'PACK': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkpack = f.read() f.close() if checkpack.count(str(fromIn) + '.sol') == 1: wse = cm.get_remote_file(str(fromIn) + '.sol', os.path.join(outputDic, str(fromIn) + '.sol'), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.sol not ready sleeping 3 seconds...' time.sleep(3) continue else: wse = cm.get_remote_file(str(fromIn) + '.sol', os.path.join(outputDic, str(fromIn) + '.sol'), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.sol not ready sleeping 3 seconds...' time.sleep(3) continue if mode == 'RNP': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, failure_test=PHASER_OUT_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break while True: atest = False btest = False if mode == 'PACK': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkpack = f.read() f.close() if checkpack.count(str(fromIn) + '.sol') == 1: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + '.sol')) else: atest = True else: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + '.sol')) if mode == 'RNP': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.1\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break else: btest = True if atest and btest: break break else: time.sleep(3) continue if not havetoskip: ensembles, listaAllsol, nfixfrags = clusterAtOnceSols(DicParameters, listaAllsol, isArcimboldo, renamePDBs, copy.deepcopy(rnp_sol), outputDic, str(fromIn), quate, laue, listNCS, excludeZscore, mode, clusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, convNames, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids) candelete = True if candelete: try: nb = fromIn if os.path.exists(outputDic + str(nb) + '.sol'): os.remove(outputDic + str(nb) + '.sol') if os.path.exists(outputDic + str(nb) + '.out'): os.remove(outputDic + str(nb) + '.out') fixfrfr = 1 if fixed_frags > 0: fixfrfr = fixed_frags for rt in range(fixfrfr): try: os.unlink(outputDic + str(rt) + '_' + str(nb) + '.pdb') except: pass if os.path.exists(outputDic + str(nb) + '.sh'): os.remove(outputDic + str(nb) + '.sh') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.rlist'): os.remove(outputDic + str(nb) + '.rlist') except: print 'Cannot find the file to delete.' fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated and not startedLocal: print 'Long sleep, queue not ready' time.sleep(60) for root, subFolders, files in os.walk(outputDicr): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # s = re.findall('[0-9]+\\.[0-9]+\\.[0-9]+\\.pdb', fileu) if len(fileu.split(".")) > 3: os.remove(pdbf) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) listaAllsol.append([]) ClusAll = saveRotations(DicParameters, listaAllsol, LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) return (ClusAll, ensembles, nfixfrags) def removeParallelFragments(Clusters): Clus = [] for cla in Clusters: lista = cla["heapSolutions"].asList() hp = ADT.Heap() for item in lista: prio, rota = item liNot = [] for fix in rota["fixed_frags"]: liNot.append(int(fix["n_prev_cluster"])) if int(rota["n_prev_cluster"]) not in liNot: hp.push(prio, rota) cla["heapSolutions"] = hp Clus.append(cla) return Clus def eraseRotationsInList(Clusters, done): for clu in Clusters: hp = ADT.Heap() for item in clu["heapSolutions"]: prio, rota = item if rota["n_prev_cluster"] not in done: hp.push(prio, rota) clu["heapSolutions"] = hp return Clusters def divideInGroupsForFixedFrags(Clusters): Clus = [] for tru in Clusters: lista = tru["heapSolutions"].asList() for item in lista: prio, rota = item fix = rota["fixed_frags"] if fix not in Clus: Clus.append(fix) return Clus def getClustersUsed(Clusters): used = [] for clu in Clusters: lista = clu["heapSolutions"].asList() for item in lista: prio, rota = item if rota["n_prev_cluster"] not in used: used.append(rota["n_prev_cluster"]) if "fixed_frags" in rota: for fix in rota["fixed_frags"]: if fix["n_prev_cluster"] not in used: used.append(fix["n_prev_cluster"]) return used def eraseClustersInList(comparing, done): Clus = [] for u in range(len(comparing)): if u not in done: Clus.append(comparing[u]) return Clus def createCombinations(Clus, comparing): Clusters = [None for _ in range(nOfCluster)] # print "Creating Clusters of len",len(Clusters) for fixed in Clus: liNot = [] for rot in fixed: liNot.append(int(rot["n_prev_cluster"])) clust = liNot[0] # print "Put in clusters", clust for i in range(len(comparing)): if i in liNot: continue """ if clust == 0: if i in liNot or i= 0: din["longest"] = [0, rotaz["elong"]] elif "longest" not in din.keys() and rotaz["elong"] < 0: din["longest"] = [rotaz["elong"], 0] elif (rotaz["elong"] < 0 and din["longest"][0] > rotaz["elong"]): din["longest"][0] = rotaz["elong"] elif (rotaz["elong"] >= 0 and din["longest"][1] < rotaz["elong"]): din["longest"][1] = rotaz["elong"] hp.push(priority, rotaz) din["heapSolutions"] = hp Clusters[clust] = din # print "...rotation inserted in cluster: "+str(0)+"..." else: priority = (-1 * rotaz["llg"], -1 * rotaz["zscore"]) rotaz["fixed_frags"] = fixed if "longest" not in (Clusters[clust]).keys() and rotaz["elong"] >= 0: (Clusters[clust])["longest"] = [0, rotaz["elong"]] elif "longest" not in (Clusters[clust]).keys() and rotaz["elong"] < 0: (Clusters[clust])["longest"] = [rotaz["elong"], 0] elif (rotaz["elong"] < 0 and (Clusters[clust])["longest"][0] > rotaz["elong"]): (Clusters[clust])["longest"][0] = rotaz["elong"] elif (rotaz["elong"] >= 0 and (Clusters[clust])["longest"][1] < rotaz["elong"]): (Clusters[clust])["longest"][1] = rotaz["elong"] ((Clusters[clust])["heapSolutions"]).push(priority, rotaz) return Clusters def evaluateFRF_MPR(DicParameters, GRID_TYPE, QNAME, FRACTION, PARTITION, cm, sym, nice, DicGridConn, RotClu, nameJob, outputDicr, nqueue, quate, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo=False, tops=None, LIMIT_CLUSTER=None, applyNameFilter=False, candelete=True, giveids=False, merge=[]): f = open(os.path.join(outputDicr, nameJob + ".pic"), "w") lista_dati = [PATH_NEW_PHASER, PATH_NEW_SHELXE, PATH_NEW_ARCIFIRE, DicParameters, nice, DicGridConn, RotClu, nameJob, outputDicr, nqueue, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo, tops, LIMIT_CLUSTER, applyNameFilter, candelete, giveids, merge, sym, GRID_TYPE, QNAME, FRACTION, PARTITION] pickle.dump(lista_dati, f) pickle.dump(os.path.join(outputDicr, nameJob + "_end.txt"), f) f.close() def startJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, "merged.sum")) or os.path.exists( os.path.join(outputDirectory, "unmerged.sum")): return print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_ARCIFIRE, "-r", os.path.join(outputDirectory, op) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_ARCIFIRE, "-r", os.path.join(outputDirectory, op)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() print err execu = "" if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDicr, nameJob + ".pic"), nameJob + ".pic", force_cumulative=False) cm.copy_local_file(PATH_NEW_ARCIFIRE, os.path.basename(PATH_NEW_ARCIFIRE)) execu = os.path.join(cm.get_remote_pwd(), os.path.basename(PATH_NEW_ARCIFIRE)) else: execu = PATH_NEW_ARCIFIRE if cm is None: try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startJob, outputDicr, os.path.join(outputDicr, nameJob + ".pic")) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = "" + PATH_NEW_ARCIFIRE + " -r " + os.path.join(outputDicr, nameJob + ".pic") + " > /dev/null" # +nameDir+"log_output_"+str(c)+".out" SystemUtility.launchCommand(comando, os.path.join(outputDicr, nameJob + "_end.txt"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) if hasattr(cm, "channel"): job.setInitialDir(cm.get_remote_pwd()) else: job.setInitialDir(os.path.abspath(outputDicr)) script = """#!/bin/bash if [ ! -f """ + execu + """ ]; then """ + execu + """ -r """ + os.path.join(outputDicr, nameJob + ".pic") + """ > /dev/null else """ + os.path.basename(execu) + """ -r """ + os.path.join(outputDicr, nameJob + ".pic") + """ > /dev/null fi """ f = open(os.path.join(outputDicr, nameJob + ".sh"), "w") f.write(script) f.close() st = os.stat(os.path.join(outputDicr, nameJob + ".sh")) os.chmod(os.path.join(outputDicr, nameJob + ".sh"), st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) job.setExecutable(os.path.join(outputDicr, nameJob + ".sh")) job.addInputFile(os.path.join(outputDicr, nameJob + ".pic"), False) job.addInputFile(execu, False) job.addOutputFile("merged.sum", False) job.addOutputFile("unmerged.sum", False) (nc, nq) = cm.submitJob(job, isthelast=True) SystemUtility.LISTJOBS[nameJob] = [(os.path.join(outputDicr, nameJob + "_end.txt"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, True, "")] print "Search in " + str(nq) + " structure submitted to the cluster " + str(nc) return os.path.join(outputDicr, "merged.sum"), os.path.join(outputDicr, "unmerged.sum") def evaluateFRF_clusterOnce(DicParameters, cm, sym, DicGridConn, RotClu, nameJob, outputDicr, nqueue, quate, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo=False, tops=None, LIMIT_CLUSTER=None, applyNameFilter=False, candelete=True, giveids=False, merge=[]): global LAST_AVAILABLE_ROTID status = "--" fromIn = 0 toIn = nqueue - 1 foms = {"llg": [numpy.inf, 0.0], "zscore": [numpy.inf, 0.0]} listaAllrot = [[], [], {}, [[], RotClu]] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print "Evaluating " + str(fromIn) + " rotation file" if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".out", os.path.join(outputDic, str(fromIn) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(fromIn) + ".rlist", os.path.join(outputDic, str(fromIn) + ".rlist"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".rlist not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(fromIn)+".rlist") # print cm.remove_remote_file(str(fromIn)+".out") # print cm.remove_remote_file(str(fromIn)+".sum") # print cm.remove_remote_file(str(fromIn)+".sh") # print cm.remove_remote_file(str(fromIn)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".out")): checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".rlist")) if atest: break break else: # print "File ",os.path.join(outputDic,str(fromIn)+".out"),os.path.exists(os.path.join(outputDic,str(fromIn)+".out")) time.sleep(3) continue lastFile = False if fromIn == toIn: lastFile = True if not giveids: listaAllrot = clusterAtOnce(DicParameters, listaAllrot, isArcimboldo, outputDic, str(fromIn), quate, laue, ncs, excludeLLG, "FRF", clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids, applyNameFilter=applyNameFilter, lastFile=lastFile) else: ensembles, listaAllrot = clusterAtOnce(DicParameters, listaAllrot, isArcimboldo, outputDic, str(fromIn), quate, laue, ncs, excludeLLG, "FRF", clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids, applyNameFilter=applyNameFilter, lastFile=lastFile) candelete = True if candelete: try: nb = fromIn os.remove(outputDic + str(nb) + ".rlist") os.remove(outputDic + str(nb) + ".out") os.remove(outputDic + str(nb) + ".pdb") for rt in range(fixed_frags): try: os.remove(outputDic + str(rt) + "_" + str(nb) + ".pdb") except: pass os.remove(outputDic + str(nb) + ".sh") os.remove(outputDic + str(nb) + ".sum") except Exception: # print sys.exc_info() # traceback.print_exc(file=sys.stdout) # candelete = False # sys.exit(0) pass fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rot.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) SystemUtility.close_connection(DicGridConn, DicParameters, cm) if giveids and len(merge) == 0: return listaAllrot[3][0], listaAllrot[3][1], ensembles elif len(merge) > 0: return __change_rot_clusterid(merge, listaAllrot[3][0], quate, laue, ncs, clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG) else: return listaAllrot[3][0], listaAllrot[3][1] def __change_rot_clusterid(template, data, quate, laue, ncs, clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG): visited = [] numc = 0 unmerged = [] for a in range(len(data)): clu1 = data[a] if len(clu1["heapSolutions"].asList()) == 0: continue item1 = clu1["heapSolutions"].pop() clu1q = item1[1] clu1["heapSolutions"].push(item1[0], item1[1]) numc = clu1q["n_prev_cluster"] performed = False numd = -1 for b in range(len(template)): if b in visited: continue clu2 = template[b] if len(clu2["heapSolutions"].asList()) == 0: continue item2 = clu2["heapSolutions"].pop() clu2q = item2[1] clu2["heapSolutions"].push(item2[0], item2[1]) re, elo = compareRotation(clu1q, clu2q, thresholdCompare, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG) if re: visited.append(b) performed = True numd = clu2q['n_prev_cluster'] break if performed: for ite in clu1["heapSolutions"].asList(): prio2 = ite[0] rota2 = ite[1] rota2['n_prev_cluster'] = numd # NOTE: the following instruction imply we are always working with 1 fixed frag. This method thus # it is not supported for ARCIMBOLDO but just for ARCIMBOLDO-BORGES rota2['original_rotcluster'] = numd # priority = (-1 * rota2['llg'], -1 * rota2['zscore']) # clu1["heapSolutions"].push(priority,rota2) else: unmerged.append(copy.deepcopy(clu1)) clu1["heapSolutions"] = ADT.Heap() return data, unmerged, ensembles def __clusterSetOfRotations(isArcimboldo, quaternions, ref_labels, ref_rotaz, thresholdCompare, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, prefilter=None): # kappa,whitened = Bioinformatics.__CrossValidationKParameter(quaternions,2,-0.1) # labels = scipy.cluster.hierarchy.fclusterdata(listaAllrot[0], 0.5, method='average') if len(quaternions) > 10000: kappa = None labels = None if isArcimboldo: kappa, labels = Bioinformatics.__STDCentroidKParameter(quaternions, 4, 0.0001, criteria="mean", oneByone=True) else: kappa, labels = Bioinformatics.__STDCentroidKParameter(quaternions, 2, 0.0001, criteria="mean", oneByone=True) # groups, labels = scipy.cluster.vq.kmeans(whitened,kappa,iter=20,minit="points") print "Performing a cluster with K-Means with K=" + str(kappa) subclu = [[] for _ in range(kappa)] for p in range(len(labels)): ind = labels[p] name = ref_labels[p] subclu[ind].append(ref_rotaz[name]) else: subclu = [[] for _ in range(len(quaternions))] for p in range(len(quaternions)): name = ref_labels[p] subclu[p].append(ref_rotaz[name]) tomerge = {} merged = [] # print "Numero: ",len(subclu) # print subclu # for clus in subclu: # print "CLUSTER======================" # for rotaz in clus: # print rotaz["euler"],ADT.cantor_pairing([rotaz["euler"][0]*3,rotaz["euler"][1]*1,rotaz["euler"][2]*2]) # print "=================================" for plu in range(len(subclu)): clus = subclu[plu] if len(clus) == 0: # print "Empty cluster",plu,"skipping..." continue rotaz = clus[0] inserted = False # print "Evaluating centroid ",plu for tr in range(len(merged)): # print "comparing with cluster",tr,"of",len(merged) rot = merged[tr][0] indsubclu = merged[tr][1] result = False elong = 0 # print "###################################" # print "rocci1",rotaz["euler"] # print "rocci2",rot["euler"] if prefilter == None or prefilter <= 0: result, elong = compareRotation(rotaz, rot, 3, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) else: result, elong = compareRotation(rotaz, rot, prefilter, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) if result: # print "Rotation is compatible with cluster",tr,"with an elongation of",elong rotaz["elong"] = elong inserted = True if indsubclu not in tomerge.keys(): tomerge[indsubclu] = [plu] else: tomerge[indsubclu] += [plu] break if not inserted: # print "Inserting rotation in a new cluster" rotaz["elong"] = 0 merged.append((rotaz, plu)) tomerge[plu] = [] # print "Numero ex clusters",len(subclu) # print "Numero nuovi clusters",len(merged) # print "Accorpamenti" # print tomerge startT = 6 if prefilter == None or prefilter <= 0: startT = 6 else: startT = 3 for key1 in tomerge.keys(): liuno = subclu[key1] for k in tomerge[key1]: liuno += subclu[k] liuno = sorted(liuno, __cmp_rota, reverse=True) liu = [liuno[0]] subclu[key1] = liu tomerge[key1] = [] for trrr in numpy.arange(startT, thresholdCompare + 3, 3): doSomething = True while doSomething: doSomething = False for key1 in tomerge.keys(): for key2 in tomerge.keys(): if key1 == key2: continue result = False elong = 0 liuno = sorted(subclu[key1], __cmp_rota, reverse=True) lidue = sorted(subclu[key2], __cmp_rota, reverse=True) rotaz = liuno[0] rot = lidue[0] # print "###################################" # print "rocci1",rotaz["euler"] # print "rocci2",rot["euler"] result, elong = compareRotation(rotaz, rot, trrr, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) if result: # print "Rotation is compatible with cluster",tr,"with an elongation of",elong rotaz["elong"] = elong tomerge[key1] = tomerge[key1] + tomerge[key2] + [key2] del tomerge[key2] doSomething = True break # to the first for if doSomething: break # to the while nuovo_clus = [] indes = 0 for key in tomerge.keys(): value = tomerge[key] + [key] # pdb_done = [] dictio = {} dictio["heapSolutions"] = ADT.Heap() # print "NEW CLUSTER==================================" for va in value: subclu[va] = sorted(subclu[va], __cmp_rota, reverse=True) for rotaz in subclu[va]: # print rotaz["euler"],rotaz["llg"],rotaz["name"],"----",pdb_done # if not isArcimboldo and rotaz["name"] in pdb_done: # print "Not inserted!!" # continue # pdb_done.append(rotaz["name"]) dictio["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) nuovo_clus.append(copy.deepcopy(dictio)) indes += 1 # print "=================================================" tome = {} for kj in range(len(nuovo_clus)): tome[kj] = [] print "Final:", tome # else: # tomerge = {} # for va in range(len(nuovo_clus)): # tomerge[va] = [] return tome, nuovo_clus def __cmp_atom(a, b): ai = a.get_full_id()[3] bi = a.get_full_id()[3] return cmp(ai, bi) def __cmp_statsrt(a, b): ai = a[0] bi = b[0] return cmp(ai, bi) def __cmp_cluster_zscore(a, b): li1 = sorted(a["heapSolutions"].asList(), __cmp_rota3, reverse=True) if len(li1) > 0: prioa, rota = li1[0] else: prioa = (-1, -1) rota = {"zscore": -10000000000000} li2 = sorted(b["heapSolutions"].asList(), __cmp_rota3, reverse=True) if len(li2) > 0: priob, rotb = li2[0] else: priob = (-1, -1) rotb = {"zscore": -10000000000000} ai = float(rota["zscore"]) bi = float(rotb["zscore"]) return cmp(ai, bi) def __cmp_cluster(a, b): li1 = sorted(a["heapSolutions"].asList(), __cmp_rota2, reverse=True) if len(li1) > 0: prioa, rota = li1[0] else: prioa = (-1, -1) rota = {"llg": -10000000000000} li2 = sorted(b["heapSolutions"].asList(), __cmp_rota2, reverse=True) if len(li2) > 0: priob, rotb = li2[0] else: priob = (-1, -1) rotb = {"llg": -10000000000000} ai = float(rota["llg"]) bi = float(rotb["llg"]) return cmp(ai, bi) def __cmp_rota(a, b): ai = float(a["llg"]) bi = float(b["llg"]) return cmp(ai, bi) def __cmp_rota2(a, b): ai = (float(a[1]["llg"]), float(a[1]["zscore"])) bi = (float(b[1]["llg"]), float(b[1]["zscore"])) return cmp(ai, bi) def __cmp_rota3(a, b): ai = (float(a[1]["zscore"]), float(a[1]["llg"])) bi = (float(b[1]["zscore"]), float(b[1]["llg"])) return cmp(ai, bi) def checkYOURoutput(myfile, conditioEND, testEND, sleep_ifnot_ready=True, failure_test=None): esegui = True correct = False while esegui: if not os.path.exists(myfile): if sleep_ifnot_ready: time.sleep(3) continue else: return False if failure_test != None and testEND != None and sleep_ifnot_ready: f = open(myfile) e = f.read() f.close() if isinstance(failure_test, str): numero = e.count(failure_test) # print "--------",myfile,failure_test,numero,testEND,conditioEND if numero == int(testEND): return None elif isinstance(conditioEND, list): numero = 0 for uu in conditioEND: numero += e.count(uu) if numero == int(testEND): return None if conditioEND != None and testEND != None: f = open(myfile) e = f.read() f.close() if isinstance(conditioEND, str): numero = e.count(conditioEND) # print "+++--------+++",myfile,numero,conditioEND,testEND,type(numero),type(testEND) if numero == int(testEND): esegui = False correct = True elif sleep_ifnot_ready: time.sleep(3) else: return False elif isinstance(conditioEND, list): numero = 0 for uu in conditioEND: numero += e.count(uu) if numero == int(testEND): esegui = False correct = True elif sleep_ifnot_ready: time.sleep(3) else: return False else: esegui = False correct = True return correct def evaluateExp(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, convNames, dirPath, method="fast", renamePDBs=True, insfile=None): status = "--" fromIn = 0 toIn = nqueue - 1 eseguitoUnaVolta = False startexp = ".pda" if insfile != None: startexp = ".phi" dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: riprova = True if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 havetoskip = False while riprova: try: while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".lst", os.path.join(outputDic, str(fromIn) + ".lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wse, bool) and not wse: wser = cm.get_remote_file(str(fromIn) + '.lst', os.path.join(outputDic, str(fromIn) + '.lst'), conditioEND=SHELXE_LST_FAILURE_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wser, bool) and not wser: print 'File ' + str(fromIn) + '.lst not ready sleeping 3 seconds...' time.sleep(3) continue else: havetoskip = True break wse = cm.get_remote_file(str(fromIn) + ".pdb", os.path.join(outputDic, str(fromIn) + ".pdb"), conditioEND=SHELXE_PDB_PHS_END_CONDITION, testEND=SHELXE_PDB_PHS_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(fromIn) + ".phs", os.path.join(outputDic, str(fromIn) + ".phs"), conditioEND=SHELXE_PDB_PHS_END_CONDITION, testEND=SHELXE_PDB_PHS_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".phs not ready sleeping 3 seconds..." time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".lst")): failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, failure_test=SHELXE_LST_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break while True: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".pdb")) btest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".phs")) if atest and btest: break break else: time.sleep(3) continue print "Evaluating " + str(fromIn) + " model corresponding to: " + str( convNames[str(fromIn) + startexp]) if not havetoskip and renamePDBs: shutil.move(outputDic + str(fromIn) + ".pdb", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".pdb")) if insfile == None: shutil.move(outputDic + str(fromIn) + ".pda", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pda")) else: shutil.move(outputDic + str(fromIn) + ".phi", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".phi")) shutil.move(outputDic + str(fromIn) + ".ins", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".ins")) shutil.move(outputDic + str(fromIn) + ".lst", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".lst")) shutil.move(outputDic + str(fromIn) + ".hkl", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".hkl")) shutil.move(outputDic + str(fromIn) + ".phs", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".phs")) try: shutil.move(outputDic + str(fromIn) + ".pdo", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pdo")) except: pass try: shutil.move(outputDic + str(fromIn) + ".ent", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".ent")) shutil.move(outputDic + str(fromIn) + "_trace.ps", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( "_trace.ps")) except: pass if not havetoskip and method == "fast": pdbf = outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".pdb") shutil.copyfile(pdbf, dirPath + "/" + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pdb")) riprova = False if havetoskip: os.remove(outputDic + str(fromIn) + startexp) os.remove(outputDic + str(fromIn) + ".lst") os.remove(outputDic + str(fromIn) + ".hkl") try: os.remove(outputDic + str(fromIn) + ".ent") os.remove(outputDic + str(fromIn) + ".ins") os.remove(outputDic + str(fromIn) + ".phi") except: pass # if hasattr(cm,"channel"): # print cm.remove_remote_file(str(fromIn)+".pda") # print cm.remove_remote_file(str(fromIn)+".lst") # print cm.remove_remote_file(str(fromIn)+".pdb") # print cm.remove_remote_file(str(fromIn)+".phs") # print cm.remove_remote_file(str(fromIn)+".hkl") fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True time.sleep(3) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file(PATH_NEW_SHELXE) cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) def evaluateExp_CC(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, convNames, isArcimboldo=False, usePDO=False, initcc_global=True, savePHS=False, archivingAsBigFile=True, phs_fom_statistics=False): status = "--" fromIn = 0 toIn = nqueue - 1 # rotas = (CLUDATA["heapSolutions"]).asList() ClustPDB = [] eseguitoUnaVolta = False hp = ADT.Heap() dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" tarro = None outputDic = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if archivingAsBigFile: if tarro != None: tarro.close() tarro = tarfile.open(os.path.join(outputDic, "" + str(ndir) + ".tar.gz"), "w:gz") if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 riprova = True while riprova: try: while 1: print "Evaluating " + str(fromIn) + " model corresponding to: " + str( convNames[str(fromIn) + ".pda"]) if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".lst", os.path.join(outputDic, str(fromIn) + ".lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".lst not ready sleeping 3 seconds..." time.sleep(3) continue if savePHS: wse = cm.get_remote_file(str(fromIn) + ".phs", os.path.join(outputDic, str(fromIn) + ".phs")) if usePDO: wse = cm.get_remote_file(str(fromIn) + ".pdo", os.path.join(outputDic, str(fromIn) + ".pdo")) break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".lst")): checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST) break else: time.sleep(3) continue f = open(outputDic + str(fromIn) + ".lst", "r") lines = f.readlines() f.close() nAtoms = -1 nER = -1 initCC = -1 finalCC = -1 wMPEa = -1 wMPEb = -1 wMPEc = -1 wMPEd = -1 shx = 0.0 shy = 0.0 shz = 0.0 p = subprocess.Popen('grep wMPE ' + outputDic + str(fromIn) + ".lst", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) pout, perr = p.communicate() pout = pout.strip() outlist = pout.splitlines() if len(outlist) > 0: wMPEa = float("".join(outlist[0].split("/")).split()[-2]) wMPEb = float("".join(outlist[0].split("/")).split()[-1]) wMPEc = float("".join(outlist[-1].split("/")).split()[-2]) wMPEd = float("".join(outlist[-1].split("/")).split()[-1]) contrast = [-1] connect = [-1] for up in range(len(lines)): line = lines[up] liloline = (line.strip()).split() if (line.strip()).startswith("Overall CC between"): line3 = line.strip() line2 = (lines[up - 2]).strip() line2L = line2.split() line3L = line3.split() try: nER = int(line2L[0]) except Exception: nER = 0 initCC = float((line3L[-1])[:-1]) # print "INITCC is",initcc continue elif len(liloline) == 8 and liloline[1] == "Atoms" and liloline[2] == "read" and liloline[ 3] == "from" and liloline[4] == "PDB": nAtoms = int(liloline[0]) # print "NATOMS",nAtoms continue if (line.strip()).startswith("Estimated mean FOM ="): line = line.strip() lineL = line.split() finalCC = float(lineL[-2]) # print "FINAL CC", finalCC if (line.strip()).startswith("Shift from model"): line = line.strip() lineL = line.split(" ") shx = float(lineL[-3][3:]) shy = float(lineL[-2][3:]) shz = float(lineL[-1][3:]) # print "SHELXE origin shift",shx,shy,shz continue if (line.strip()).startswith("") and len(line.split()) > 9: line = line.strip() lineL = line.split() contrast.append(float(lineL[5][:-1])) connect.append(float(lineL[8])) # if initCC == -1: # continue riprova = False fe = open(outputDic + str(fromIn) + ".pda", "r") fer = fe.readlines() fe.close() clus = None for luc in fer: if luc.startswith("REMARK CLUSTER"): clus = luc.split()[2] break if clus is None: clus = "-1" if not hasattr(cm, "channel") and phs_fom_statistics: f = open(outputDic + str(fromIn) + ".phs", "r") allphs = f.readlines() f.close() summa_fom = [] for line in allphs: # lista = line.strip().split() summa_fom.append(float(line[23:29])) summa_fom = sorted(summa_fom) summa_fom = summa_fom[-500:] mean_fom = numpy.mean(summa_fom) std_fom = numpy.std(summa_fom) else: mean_fom = -1 std_fom = -1 f = open(outputDic + "../tempSolCC.sum", "a") if "_" in clus: clus = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), clus.split("_"))))) f.write("===========\n") f.write("MODEL: " + str(fromIn) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])) + "\tCLUSTER: " + str( clus) + "\n") f.write("NATOMS: " + str(nAtoms) + "\t" + "NER: " + str(nER) + "\t" + "INITCC: " + str( initCC) + "\t" + "PSEUDO-FREE: " + str(finalCC) + "\t" + "wMPE: " + str(wMPEa) + " / " + str( wMPEb) + " -- " + str(wMPEc) + " / " + str(wMPEd) + "\n") f.write("SHIFT-ORIG: " + str(shx) + " " + str(shy) + " " + str(shz) + "\tMAX-CONTRAST: " + str( max(contrast)) + "\tMAX-CONNECT: " + str(max(connect)) + "\tmfom: " + str( mean_fom) + "\tsfom: " + str(std_fom) + " \n") f.write("===========\n") f.close() shutil.move(outputDic + str(fromIn) + ".pda", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])) dizio = {"model": fromIn, "corresp": outputDic + os.path.basename(convNames[str(fromIn) + ".pda"]), "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": clus, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": max(contrast), "connect": max(connect), "mfom": mean_fom, "sfom": std_fom} ClustPDB.append(dizio) if initcc_global: hp.push(-1 * dizio["initcc"], dizio) else: # initcc per residue hp.push(-1 * (dizio["initcc"] / dizio["ner"]), dizio) # PULIZIA FILES INUTILI # os.remove(outputDic+str(fromIn)+".pda") if not savePHS: os.remove(outputDic + str(fromIn) + ".lst") try: os.remove(outputDic + str(fromIn) + ".phs") except: pass else: shutil.move(outputDic + str(fromIn) + ".lst", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") shutil.move(outputDic + str(fromIn) + ".phs", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") if archivingAsBigFile: tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") try: os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") except: pass else: tarro = tarfile.open( outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".tar.gz", "w:gz") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") tarro.close() os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") try: os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") except: pass os.remove(outputDic + str(fromIn) + ".hkl") try: if not usePDO: os.remove(outputDic + str(fromIn) + ".pdo") else: shutil.move(outputDic + str(fromIn) + ".pdo", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".pdo") except: pass try: os.remove(outputDic + str(fromIn) + ".ent") os.remove(outputDic + str(fromIn) + "_trace.ps") except: pass # if hasattr(cm,"channel"): # print cm.remove_remote_file(str(fromIn)+".pda") # print cm.remove_remote_file(str(fromIn)+".lst") # print cm.remove_remote_file(str(fromIn)+".pdb") # print cm.remove_remote_file(str(fromIn)+".phs") # print cm.remove_remote_file(str(fromIn)+".hkl") fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) except: # print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True time.sleep(1) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if archivingAsBigFile: if tarro != None: tarro.close() if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file(PATH_NEW_SHELXE) cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) SystemUtility.close_connection(DicGridConn, DicParameters, cm) f = open(outputDic + "../solCC.sum", "w") visited = [] for pla in range(hp.len()): item = hp.pop() prio, ele = item leden = os.path.basename(str(ele["corresp"])).split("_") if not isArcimboldo and len(leden) == 5 and "_".join(leden[:-1]) in visited: os.remove(ele["corresp"]) continue elif not isArcimboldo and len(leden) == 5: visited.append("_".join(leden[:-1])) npath = os.path.join(os.path.dirname(ele["corresp"]), "_".join(leden[:-1]) + ".pdb") shutil.move(ele["corresp"], npath) ele["corresp"] = npath f.write("===========\n") f.write("MODEL: " + str(pla + 1) + "\tCORRESP.: " + os.path.abspath(str(ele["corresp"])) + "\tCLUSTER: " + str( ele["cluster"]) + "\n") f.write("NATOMS: " + str(ele["natoms"]) + "\t" + "NER: " + str(ele["ner"]) + "\t" + "INITCC: " + str( ele["initcc"]) + "\t" + "PSEUDO-FREE: " + str(ele["finalcc"]) + "\t" + "wMPE: " + str( ele["wMPE_init"][0]) + " / " + str(ele["wMPE_init"][1]) + " -- " + str(ele["wMPE_end"][0]) + " / " + str( ele["wMPE_end"][1]) + "\n") f.write("SHIFT-ORIG: " + str(ele["shift_origin"][0]) + " " + str(ele["shift_origin"][1]) + " " + str( ele["shift_origin"][2]) + "\tMAX-CONTRAST: " + str(ele["contrast"]) + "\tMAX-CONNECT: " + str( ele["connect"]) + "\tmfom: " + str(ele["mfom"]) + "\tsfom: " + str(ele["sfom"]) + "\t" + str( ele["wMPE_init"][0]) + " \n") f.write("===========\n") f.close() os.remove(outputDic + "../tempSolCC.sum") CC_Val1, con = readCCValFromSUM(outputDic + "../solCC.sum") return CC_Val1 def selectCC(CC_Val3): hp = ADT.Heap() for item in c1: prio = -1 * (item["initcc"] / item["ner"]) hp.push(prio, item) done = [] lio = [] for tup in hp: prio, item = tup if os.path.basename(item["corresp"]) not in done: done.append(os.path.basename(item["corresp"])) dire = os.path.split(item["corresp"])[0] li = os.path.basename(item["corresp"]).split("_")[:-1] filename = os.path.join(dire, "_".join(li)[:-1] + ".pdb") shutil.move(item["corresp"], filename) item["corresp"] = filename lio.append(item) print "", os.path.basename(filename), item["ner"], item["initcc"], item["initcc"] / item["ner"], \ item["wMPE_init"][0] return lio def unifyCC2(c1, c2, c3, convNames, CluAll, suffixA="", suffixB="", suffixC="", llgn=5, solution_sorting_scheme="AUTO"): if solution_sorting_scheme not in ["AUTO", "LLG", "ZSCORE", "INITCC", "COMBINED"]: solution_sorting_scheme = "AUTO" isminuso = all(map(lambda x: int(x["ner"]) != 0, c1 + c2 + c3)) c1 = [dict(dizio, **{"suffix": suffixA}) for dizio in c1] c2 = [dict(dizio, **{"suffix": suffixB}) for dizio in c2] c3 = [dict(dizio, **{"suffix": suffixC}) for dizio in c3] print "MODE", solution_sorting_scheme, "IS SHELXE Optimization ACTIVE?", isminuso if solution_sorting_scheme == "INITCC" or (isminuso and solution_sorting_scheme == "AUTO"): initccl = sorted(c1 + c2 + c3, key=lambda x: x["initcc"], reverse=True) done = [] initcclo = [] print "SORTING BY INITCC" for dil in range(len(initccl)): if os.path.basename(initccl[dil]["corresp"]) not in done: print "NAME:", initccl[dil]["corresp"], "INITCC:", initccl[dil]["initcc"], "NER:", initccl[dil]["ner"] done.append(os.path.basename(initccl[dil]["corresp"])) initcclo.append(initccl[dil]) return initcclo #print CluAll #for clu in CluAll: #for item in clu["heapSolutions"].asList(): #print item[1] inter = [item[1] for sublist in map(lambda y: y["heapSolutions"].asList(), CluAll) for item in sublist] #all solutions among all clusters # previ = [item for sublist in [y["fixed_frags"] for y in inter if "fixed_frags" in y] for item in sublist] previ = [{"name": os.path.basename(dizio["corresp"])[:-4], "llg": dizio["llg"], "zscore": dizio["zscore"]} for dizio in c1 + c2 + c3 if "llg" in dizio] #print "ANTES" #for dizio in inter+previ: # print convNames[dizio["name"]],dizio["name"] #print "DESPUES" try: llgd = {dizio["name"].split("xx")[1] if len((dizio["name"].split("xx")[1]).split("-")) > 1 else dizio["name"].split("xx")[1] + "-1": [dizio["llg"], dizio["zscore"]] for dizio in inter + previ} except: # print sys.exc_info() # traceback.print_exc(file=sys.stdout) llgd = {os.path.basename(convNames[dizio["name"]]): [dizio["llg"], dizio["zscore"]] for dizio in inter + previ} if solution_sorting_scheme == "LLG" or (not isminuso and solution_sorting_scheme == "AUTO"): try: #for x in c1+c2+c3: # print os.path.basename(x["corresp"]).split("xx") # print llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]] llgl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][0], reverse=True) llgl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][0], "zscore": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][1]}) for dizio in llgl] except: #print sys.exc_info() #traceback.print_exc(file=sys.stdout) #for x in c1+c2+c3: # print os.path.basename(x["corresp"]) llgl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"])][0], reverse=True) llgl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1]}) for dizio in llgl] print "SORTING BY LLG" done = [] llglo = [] for dil in range(len(llgl)): if os.path.basename(llgl[dil]["corresp"]) not in done: print "NAME:", llgl[dil]["corresp"], "LLG:", llgl[dil][ "llg"] # d[os.path.basename(llgl[dil]["corresp"]).split("xx")[1][:-4]][0] done.append(os.path.basename(llgl[dil]["corresp"])) llglo.append(llgl[dil]) return llglo if solution_sorting_scheme == "ZSCORE": try: zscl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][1], reverse=True) zscl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][0], "zscore": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][1]}) for dizio in zscl] except: zscl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"])][1], reverse=True) zscl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1]}) for dizio in zscl] print "SORTING BY ZSCORE" done = [] zsclo = [] for dil in range(len(zscl)): if os.path.basename(zscl[dil]["corresp"]) not in done: print "NAME:", zscl[dil]["corresp"], "ZSCORE:", zscl[dil][ "zscore"] # llgd[os.path.basename(zscl[dil]["corresp"]).split("xx")[1][:-4]][1] done.append(os.path.basename(zscl[dil]["corresp"])) zsclo.append(zscl[dil]) return zsclo if solution_sorting_scheme == "COMBINED": maxinitcc = max(float(max(map(lambda x: x["initcc"], c1 + c2 + c3))), 1.0) maxllg = max(float(max(map(lambda x: x[0], llgd.values()))), 1.0) maxzscore = max(float(max(map(lambda x: x[1], llgd.values()))), 1.0) try: # combl = sorted(c1+c2+c3, key=lambda x: (llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][0]/maxllg)+(llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][1]/maxzscore)+(x["initcc"]/maxinitcc), reverse=True) # combl = [dict(dizio, **{"llg":llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][0],"zscore":llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][1]}) for dizio in combl] combl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][0], "zscore": llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][1], "combined": (llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][ 0] / maxllg) + ( llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][ 1] / maxzscore) + (dizio["initcc"] / maxinitcc)}) for dizio in c1 + c2 + c3] combl = sorted(combl, key=lambda x: x["combined"], reverse=True) except: combl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1], "combined": (llgd[os.path.basename(dizio["corresp"])][0] / maxllg) + ( llgd[os.path.basename(dizio["corresp"])][1] / maxzscore) + ( dizio["initcc"] / maxinitcc)}) for dizio in c1 + c2 + c3] combl = sorted(combl, key=lambda x: x["combined"], reverse=True) print "SORTING BY COMBINED FOM of LLG, SCORE, INITCC" done = [] comblo = [] for dil in range(len(combl)): if os.path.basename(combl[dil]["corresp"]) not in done: print "NAME:", combl[dil]["corresp"], "LLG:", combl[dil]["llg"], "ZSCORE:", combl[dil][ "zscore"], "INITCC:", combl[dil]["initcc"], "NER", combl[dil]["ner"], "COMBINED:", combl[dil][ "combined"] done.append(os.path.basename(combl[dil]["corresp"])) comblo.append(combl[dil]) return comblo def unifyCC(c1, c2, c3, convNames, CluAll, suffixA="", suffixB="", suffixC="", llgn=5, solution_sorting_scheme="AUTO"): if solution_sorting_scheme not in ["AUTO", "LLG", "ZSCORE", "INITCC", "COMBINED"]: solution_sorting_scheme = "AUTO" hp = ADT.Heap() for item in c1: prio = -1 * item["initcc"] item["suffix"] = suffixA hp.push(prio, item) for item in c2: prio = -1 * item["initcc"] item["suffix"] = suffixB hp.push(prio, item) for item in c3: prio = -1 * item["initcc"] item["suffix"] = suffixC hp.push(prio, item) lio = [] done = [] hwn = 0 hl = ADT.Heap() for clu in CluAll: for prio, rotaz in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): if hwn >= llgn: hwn = 0 break else: hl.push(prio, rotaz) hwn += 1 hwn = 0 if len(hl.asList()) > 0: for sol in hl: if hwn >= llgn: break pin, itn = sol bestllg = os.path.basename(convNames[itn["name"]]) for pri, itm in hp.asList(): if os.path.basename(itm["corresp"]) == bestllg: lio.append(itm) done.append(bestllg) print "Adding top solution:", bestllg, "with LLG:", itn["llg"], "and ZSCORE:", itn["zscore"] hwn += 1 break for tup in hp: prio, item = tup if os.path.basename(item["corresp"]) not in done: lio.append(item) done.append(os.path.basename(item["corresp"])) return lio def shelxe_cycle_BORGES(lock, DicParameters, ClusAll, cm, sym, DicGridConn, i, current_directory, nameOutput, dirPathPart, fromNcycles, toNcycles, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, quate, laue, ncs, mtzP1, NCp1, Fp1, SIGFp1, spaceGroup, hkl, ent, cell_dim, shlxLinea0, shlxLineaB, traceShelxe, fixed_frags, clusteringAlg, thresholdCompare, evaLLONG, excludeLLG, excludeZscore, USE_PACKING, USE_TRANSLA, USE_REFINEMENT, NUMBER_REF_CYCLES, USE_RGR, isSHREDDER, tuple_phi=None): # SystemUtility.open_connection(DicGridConn,DicParameters,cm) fromphis = False insfile = None if isinstance(tuple_phi, tuple): fromphis = True # tuple_phi = (name.ins, {"name.phi":{""}) insfile = tuple_phi[0] lista_phis = tuple_phi[1].keys() if not os.path.exists(dirPathPart): os.makedirs(dirPathPart) for phi in lista_phis: try: os.symlink(phi, os.path.join(dirPathPart, os.path.basename(phi))) except: pass else: if not (os.path.exists(dirPathPart)): shutil.copytree(os.path.join(current_directory, "10_PREPARED/" + str(i) + "/"), dirPathPart) if fromphis: shlxLineaB = shlxLineaB + " -v0" for t in range(fromNcycles, toNcycles): SystemUtility.open_connection(DicGridConn, DicParameters, cm) if t == 2: shlx = shlxLineaB.split() for toc in range(len(shlx)): param = shlx[toc] if param.startswith("-a"): param = "-a1" if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = param if param.startswith("-K"): if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = "" shlxLineaB = " ".join(shlx) directoexp = "./11_EXP/" jobexp = "11_EXP_" fromdirexp = "11_EXP" if fromphis: directoexp = "./11_EXP_alixe/" jobexp = "11_EXP_alixe_" fromdirexp = "11_EXP_alixe" if not traceShelxe: pass else: todel = False if os.path.exists(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for r, subF, fi in os.walk(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) # print "---", pdbf # print pdbf.split("xx"),pdbf.split("FR") if len(pdbf.split("_")) < 3: todel = True if todel: shutil.rmtree(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")) shutil.rmtree(dirPathPart) if t == 1: if fromphis: if not os.path.exists(dirPathPart): os.makedirs(dirPathPart) for phi in lista_phis: os.symlink(phi, os.path.join(dirPathPart, os.path.basename(phi))) else: shutil.copytree(os.path.join(current_directory, "10_PREPARED/" + str(i) + "/"), dirPathPart) else: os.makedirs(dirPathPart) for r, subF, fi in os.walk( os.path.join(current_directory, directoexp + str(i) + "/" + str(t - 1) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf, os.path.join(dirPathPart, os.path.basename(pdbf))) if not os.path.exists(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): if t > 1: insfile = None fromphis = False (nqueue24, convNames_4) = startExpansion(cm, sym, jobexp + str(i) + "_" + str(t), os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/"), hkl, ent, nice, cell_dim, spaceGroup, shlxLineaB, dirPathPart, single=True, insfile=insfile) shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) evaluateExp(DicParameters, cm, sym, DicGridConn, jobexp + str(i) + "_" + str(t), os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/"), nqueue24, convNames_4, dirPathPart, method="fast", insfile=insfile) else: convNames_4 = {} shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) for r, subF, fi in os.walk(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf,os.path.join(dirPathPart,os.path.basename(pdbf))) mode = "ARCIMBOLDO-BORGES" if isSHREDDER: mode = "ARCIMBOLDO-SHREDDER" writeOutputFile(lock=lock, DicParameters=DicParameters, ClusAll=ClusAll, outputDir=current_directory, filename=nameOutput, mode=mode, step="FAST_" + str(t) + "_" + str(i), ensembles=convNames_4, frag_fixed=fixed_frags, path1=os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/*/"), useRefP1=USE_REFINEMENT, numberCyclesRef=NUMBER_REF_CYCLES, useRGR=USE_RGR, usePacking=USE_PACKING, useTransla=USE_TRANSLA, fromphis=fromphis, fromdirexp=fromdirexp) forceGenerateHTML(lock, current_directory, nameOutput) SystemUtility.close_connection(DicGridConn, DicParameters, cm) shutil.rmtree(dirPathPart) SystemUtility.endCheckQueue() def shelxe_cycles(lock, DicParameters, cm, sym, DicGridConn, output_directory, nameOutput, dirPathPart, fragdirectory, fromNcycles, toNcycles, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, quate, laue, ncs, spaceGroup, hkl, ent, cell_dim, shlxLineaB, USE_PACKING, USE_TRANSLA): SystemUtility.open_connection(DicGridConn, DicParameters, cm) #if os.path.exists(dirPathPart): # shutil.rmtree(dirPathPart) #shutil.copytree(fragdirectory + "7_PREPARED/", dirPathPart) # NOTE: The following allows to start one cycle with no autrotracing and get the finalcc if toNcycles <= 1: toNcycles = 2 for t in range(fromNcycles, toNcycles): if t == 2: shlx = shlxLineaB.split() for toc in range(len(shlx)): param = shlx[toc] if param.startswith("-a"): param = "-a1" if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = param if param.startswith("-K"): if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = "" shlxLineaB = " ".join(shlx) todel = False if os.path.exists(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for r, subF, fi in os.walk(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if not (fileu.startswith("th") and len(pdbf.split("xx")) > 1 and len(pdbf.split("FR")) > 1): # print "---", pdbf # print pdbf.split("xx"),pdbf.split("FR") todel = True if todel: shutil.rmtree((os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"))) if not os.path.exists(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): (nqueue24, convNames_4) = startExpansion(cm, sym, "8_EXP_LIBRARY" + "_" + str(t), os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"), hkl, ent, nice, cell_dim, spaceGroup, shlxLineaB, dirPathPart, single=True) shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) evaluateExp(DicParameters, cm, sym, DicGridConn, "8_EXP_LIBRARY" + "_" + str(t), os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"), nqueue24, convNames_4, dirPathPart, method="fast", renamePDBs=True) else: convNames_4 = {} shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) for r, subF, fi in os.walk(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf,os.path.join(dirPathPart,os.path.basename(pdbf))) writeOutputFile(lock, DicParameters, None, output_directory, nameOutput, "ARCIMBOLDO", "FAST_" + str(t), convNames_4, 1, path1=os.path.join(fragdirectory, "8_EXP_LIBRARY/" + str(t) + "/*/"), usePacking=USE_PACKING, useTransla=USE_TRANSLA) forceGenerateHTML(lock, output_directory, nameOutput) shutil.rmtree(dirPathPart) SystemUtility.endCheckQueue() def readSpaceGroupFromOut(out): oi = out.splitlines() spaceGroup = "" for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Space-Group Name (Hall Symbol):"): for ed in uu[31:].strip(): if not ed.startswith("("): spaceGroup += ed else: break break return spaceGroup.strip() def uniqueReflectionsFromOut(out): # READING THE NUMBER OF UNIQUE REFLECTION FROM OUT oi = out.splitlines() ref = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Number of Reflections in Selected Resolution Range:"): ref = uu.split()[7] break return int(ref) def resolutionFromOut(out): # READING THE RESOLUTION FROM OUT oi = out.splitlines() res = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Resolution Selected:"): res = uu.split()[2] break return float(res) def cellDimensionFromOut(out): # READING THE CELL DIMENSIONS FROM MTZ oi = out.splitlines() cell_dim = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Unit Cell:"): cell_dim = uu.split()[2:] break return cell_dim