#! /usr/bin/env python # -*- coding: utf-8 -*- import warnings warnings.simplefilter("ignore", DeprecationWarning) warnings.simplefilter("ignore", UserWarning) import sys import numpy import os import shutil import time import re import datetime import SystemUtility import subprocess import Quaternions import Grid import tarfile import stat import ADT import scipy.stats # import scipy.cluster.vq import threading import Bioinformatics import copy import traceback import itertools import pickle import json import cStringIO import BORGES_MATRIX import getpass # import matplotlib.pyplot as plt # import matplotlib.colors as mcolors # from matplotlib.backends.backend_pdf import PdfPages from Bio.PDB import * from termcolor import colored import xml.etree.ElementTree as ET # Auto manipulated from the library BRFfor = -1 listaEva = {} # To be set up from the binaries call LOCAL = True SHELXE_REQUIREMENTS = "" PHASER_REQUIREMENTS = "" BORGES_REQUIREMENTS = "" GRID_TYPE_L = "" GRID_TYPE_R = "" PATH_NEW_PHASER = "" PATH_NEW_SHELXE = "" PATH_NEW_BORGESCLIENT = "" PATH_NEW_ARCIFIRE = "" # INTERNAL VARIABLES TO CHANGE ONLY IF PHASER OR SHELXE CHANGES# PHASER_V1 = "2.6" PHASER_V2 = "2.7" SHELXE_V1 = "2014/4" SHELXE_V2 = "2016/1" SHELXE_V3 = "2016/1E" SHELXE_V4 = "2016" PYTHON_V = (2, 7, 9) GCC_V = "[GCC 4.4.3]" SHELXE_LST_END_CONDITION = """grep "finished" *.lst | wc -l""" SHELXE_LST_FAILURE_CONDITION = """grep "giving up" *.lst | wc -l""" SHELXE_PDB_PHS_END_CONDITION = """ls *.lst | wc -l""" SHELXE_LST_END_CONDITION_LOCAL = """finished""" SHELXE_LST_FAILURE_CONDITION_LOCAL = """giving up""" PHASER_OUT_END_CONDITION = """grep "EXIT STATUS: SUCCESS" *.out | wc -l""" PHASER_OUT_FAILURE_CONDITION = """grep "EXIT STATUS: FAILURE" *.out | wc -l""" PHASER_OUT_ANY_CASE = """grep "EXIT STATUS:" *.out | wc -l""" PHASER_NMA_END_CONDITION = PHASER_OUT_END_CONDITION PHASER_RLIST_SOL_PDB_END_CONDITION = """ls *.out | wc -l""" PHASER_OUT_END_CONDITION_LOCAL = """EXIT STATUS: SUCCESS""" PHASER_OUT_FAILURE_CONDITION_LOCAL = """EXIT STATUS: FAILURE""" PHASER_NMA_END_CONDITION_LOCAL = PHASER_OUT_END_CONDITION_LOCAL PHASER_OUT_END_TEST = 1 PHASER_NMA_END_TEST = PHASER_OUT_END_TEST PHASER_RLIST_SOL_PDB_END_TEST = 0 SHELXE_LST_END_TEST = 1 SHELXE_PDB_PHS_END_TEST = 0 NUMBER_OF_FILES_PER_DIRECTORY = 1000 BASE_SUM_FROM_WD = True LAST_AVAILABLE_ROTID = 0 MAP_OF_ROT_COMB = {} POSTMORTEM = False STOP_IF_SOLVED = True ############################################################### ####################################################################################################### # FUNCTIONS # ####################################################################################################### def mergeRotClusterObjects(Clu1, Clu2, suffix="merged",reset_euler="none"): for clu2 in Clu2: if len(clu2["heapSolutions"].asList()) > 0: for clu1 in Clu1: if len(clu1["heapSolutions"].asList()) > 0: if clu1["heapSolutions"].asList()[0][1]["n_prev_cluster"] == clu2["heapSolutions"].asList()[0][1][ "n_prev_cluster"]: kd = ADT.Heap() for item in clu1["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] + suffix if reset_euler.lower() in ["first","both"]: rota["euler"] = [0.0,0.0,0.0] kd.push(prio, rota) for item in clu2["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] if reset_euler.lower() in ["second","both"]: rota["euler"] = [0.0,0.0,0.0] kd.push(prio, rota) clu2["heapSolutions"] = kd break return Clu2 def generateFakeMRSum_sols(model_file, initlocations, mode, single_cluster, output_direc, namesum, arcimboldo=False): Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for solu in initlocations: counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [solu[3], solu[4], solu[5]], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [solu[0], solu[1], solu[2]]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = model_file if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def generateFakeMRSum(models_dir, mode, single_cluster, output_direc, namesum, arcimboldo=False): Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for root, subFolders, files in os.walk(models_dir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [0.0, 0.0, 0.0], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [0.0, 0.0, 0.0]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = pdbf if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames, isARCIMBOLDO_LITE=True): convPACK, CluPACK, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPACK, "ROTSOL") inter = [item[1] for sublist in map(lambda y: y["heapSolutions"].asList(), CluPACK) for item in sublist] zscored = {(dizio["name"], tuple(sorted(dizio["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else dizio[ "n_prev_cluster"]): dizio["zscore"] for dizio in inter} Clu = [] for clun in CluAll: dic = {"heapSolutions": ADT.Heap()} for item in clun["heapSolutions"].asList(): prio, rota = item rota["zscore"] = zscored[(rota["name"], tuple(sorted(rota["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else rota["n_prev_cluster"])] prio = (-1 * rota["llg"], -1 * rota["zscore"]) dic["heapSolutions"].push(prio, rota) Clu.append(dic) return Clu, convNames def readPMGYREsum(gyreSUMPath): f = open(gyreSUMPath, "r") alllines = f.readlines() f.close() CCVAL = [] for line in alllines: riga1 = line.split() model = getNewPathFromMerging(gyreSUMPath,riga1[1]) size = int(riga1[3]) rmsd_gyre = float(riga1[5]) rmsd_init = float(riga1[7]) core_init = int(riga1[9]) rmsd_fin = float(riga1[11]) core_fin = int(riga1[13]) rmsd_init_common = float(riga1[15]) rmsd_fin_common = float(riga1[17]) rmsd_diff = float(riga1[19]) core_common = int(riga1[21]) llg = float(riga1[23]) dizio = {"model": model, "size": size, "rmsd_gyre": rmsd_gyre, "rmsd_init": rmsd_init, "core_init": core_init, "rmsd_fin": rmsd_fin, "core_fin": core_fin, "rmsd_init_common": rmsd_init_common, "rmsd_fin_common": rmsd_fin_common, "rmsd_diff": rmsd_diff, "core_common": core_common, "llg": llg} CCVAL.append(dizio) return CCVAL def generateStatisticsPMGyre(CC_sol, outputDir, gyrePath, cycle_ref): if not os.path.exists(gyrePath): return if not os.path.exists(outputDir): os.makedirs(outputDir) CC_valuta = {} for ele in CC_sol: CC_valuta[os.path.basename(ele["corresp"])] = ele for q in range(cycle_ref - 1): sumg = os.path.join(gyrePath, str(q) + "/pm_gyre.sum") val = readPMGYREsum(sumg) graph1 = os.path.join(outputDir, "rmsdgyre_size_" + str(q)) graph1a = os.path.join(outputDir, "rmsdgyre_size_filtered_" + str(q)) qe = open(graph1 + ".scri", "w") qe.write("set terminal png size 800,1400\nset output \"" + graph1 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set ylabel "RMSD after Gyre"\n') qe.write('set xlabel "Size of the model"\n') qe.write("unset key\n") qc = open(graph1a + ".scri", "w") qc.write("set terminal png size 800,1400\nset output \"" + graph1a + ".png\"\n") qc.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qc.write("unset key\n") qc.write('set ylabel "RMSD after Gyre"\n') qc.write('set xlabel "Size of the model"\n') qn = open(graph1 + ".data", "w") qn.write("#\tX\tY\tC\n") qf = open(graph1a + ".data", "w") qf.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["size"], y["size"])): if ele["model"] in CC_valuta: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qf.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str("%.2f" % 100) + "\n") qe.write("plot \"" + graph1 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qn.close() qe.close() qc.write("plot \"" + graph1a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qf.close() qc.close() graph2 = os.path.join(outputDir, "rmsdgyre_hyst_" + str(q)) qe = open(graph2 + ".scri", "w") qe.write("set terminal png size 2400,800\nset output \"" + graph2 + ".png\"\n") qe.write('set xlabel "RMSD after Gyre"\n') qe.write('set ylabel "Number of models"\n') qe.write("unset key\n") qe.write("""set xtics rotate out set style data histograms set style fill solid border set style histogram clustered """) qn = open(graph2 + ".data", "w") qn.write("#\tX\tY\n") dicr = {} for ele in val: if str("%.2f" % ele["rmsd_gyre"]) not in dicr: dicr[str("%.2f" % ele["rmsd_gyre"])] = 1 else: dicr[str("%.2f" % ele["rmsd_gyre"])] += 1 for key in sorted(dicr.keys(), lambda x, y: cmp(float(x), float(y))): hw = dicr[key] qn.write(key + "\t" + str(hw) + "\n") qe.write("plot \"" + graph2 + ".data\" using 2:xticlabels(1) title \"Frequency rmsd_gyre\"\n") qn.close() qe.close() graph3 = os.path.join(outputDir, "init_fin_" + str(q)) graph3a = os.path.join(outputDir, "init_fin_filtered_no_tr_" + str(q)) graph3b = os.path.join(outputDir, "init_fin_filtered_no_tr_bad_core_" + str(q)) qe = open(graph3 + ".scri", "w") qea = open(graph3a + ".scri", "w") qeb = open(graph3b + ".scri", "w") qe.write("set terminal png size 1000,1000\nset output \"" + graph3 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set xlabel "RMSD initial model vs final structure"\n') qe.write('set ylabel "RMSD gyred model vs final structure"\n') qe.write("unset key\n") qea.write("set terminal png size 1000,1000\nset output \"" + graph3a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set xlabel "RMSD initial model vs final structure"\n') qea.write('set ylabel "RMSD gyred model vs final structure"\n') qea.write("unset key\n") qeb.write("set terminal png size 1000,1000\nset output \"" + graph3b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set xlabel "RMSD initial model vs final structure"\n') qeb.write('set ylabel "RMSD gyred model vs final structure"\n') qn = open(graph3 + ".data", "w") qn.write("#\tX\tY\tC\n") qna = open(graph3a + ".data", "w") qna.write("#\tX\tY\tC\n") qnb = open(graph3b + ".data", "w") qnb.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["rmsd_init_common"], y["rmsd_init_common"])): if ele["model"] in CC_valuta: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write(str("%.2f" % ele["rmsd_init_common"]) + "\t" + str( "%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % 100) + "\n") qe.write("plot \"" + graph3 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qn.close() qe.close() qea.write("plot \"" + graph3a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qna.close() qea.close() qeb.write("plot \"" + graph3b + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qnb.close() qeb.close() graph4 = os.path.join(outputDir, "rmsd_diffn_" + str(q)) graph4a = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_" + str(q)) graph4b = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_bad_core_" + str(q)) qe = open(graph4 + ".scri", "w") qea = open(graph4a + ".scri", "w") qeb = open(graph4b + ".scri", "w") qe.write("set terminal png size 4000,500\nset output \"" + graph4 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qe.write('set xlabel "Model"\n') qe.write('set ylabel "Rmsd diff"\n') qe.write('set xtics rotate out\n') qe.write("unset key\n") qea.write("set terminal png size 3000,500\nset output \"" + graph4a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qea.write('set xlabel "Model"\n') qea.write('set ylabel "Rmsd diff"\n') qea.write('set xtics rotate out\n') qea.write("unset key\n") qeb.write("set terminal png size 2000,500\nset output \"" + graph4b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qeb.write('set xlabel "Model"\n') qeb.write('set ylabel "Rmsd diff"\n') qeb.write('set xtics rotate out\n') qn = open(graph4 + ".data", "w") qn.write("#\tN\tX\tY\tC\n") qna = open(graph4a + ".data", "w") qna.write("#\tN\tX\tY\tC\n") qnb = open(graph4b + ".data", "w") qnb.write("#\tN\tX\tY\tC\n") index = 1 indexa = 1 indexb = 1 for ele in sorted(val, lambda x, y: cmp(x["model"], y["model"])): if ele["model"] in CC_valuta: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write(str(indexa) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexa += 1 if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write( str(indexb) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexb += 1 else: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % 100) + "\n") index += 1 qe.write( "plot \"" + graph4 + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qn.close() qe.close() qea.write( "plot \"" + graph4a + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qna.close() qea.close() qeb.write( "plot \"" + graph4b + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qnb.close() qeb.close() def fillClusters(DicParameters, CluAll, merged_list, unmerged_list, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg): global LAST_AVAILABLE_ROTID # NOTE: fill the cluster of the merged_list i = 1 for merged in merged_list: while 1: if not os.path.exists(merged): time.sleep(1) continue break # C,e = readClustersFromSUM(merged) e, C, Rc, er = readClustersFromSUMToDB(DicParameters, merged, "ROTSOL") npt = os.path.join(os.path.split(merged)[0], "./0") try: shutil.rmtree(npt) except: "Cannot remove", npt for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) CluAll[rota["n_prev_cluster"]]["heapSolutions"].push(prio, rota) convNames[rota["name"]] = e[blabla] # print "Merging",rota["name"],"in Cluster",rota["n_prev_cluster"] i += 1 # NOTE: for the unmerged clustering between them should be done and then add those clusters to CluAll list_rot_unmerged = [] for unmerged in unmerged_list: while 1: if not os.path.exists(unmerged): time.sleep(1) continue break C, e = readClustersFromSUM(unmerged) for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) rota["n_prev_cluster"] = LAST_AVAILABLE_ROTID rota["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, rota) list_rot_unmerged.append(dic) convNames[rota["name"]] = e[blabla] LAST_AVAILABLE_ROTID += 1 # print "Unmerged",rota["name"],"assigned cluster",LAST_AVAILABLE_ROTID i += 1 print "Start clustering unmerged rotations #:", len(list_rot_unmerged) performed, Clud = unifyClustersEquivalent(list_rot_unmerged, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "...Done! Merged clusters", len(Clud) for c in Clud: CluAll.append(c) # print "Final number of clusters: ",CluAll print "Clusters before trying unify...", len(CluAll) performed, CluAll = unifyClustersEquivalent(CluAll, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "Clusters after having unified", len(CluAll) CluAll, convNames = applyFilterName(CluAll, convNames) return CluAll, convNames def localizeModelFragmentsInRealStructure(mosedDir, predictedDir, outDir): dictRes = {} if not os.path.exists(outDir): os.makedirs(outDir) for root, subFolders, files in os.walk(predictedDir): for fileu in files: pdbf = os.path.join(root, fileu) if not pdbf.endswith(".pdb"): continue tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf, False) # print pdbf # print tupleResult # print "--------------------------" lisBigSS = tupleResult[1] strucc = tupleResult[0] cont = 0 idname = (os.path.basename(pdbf))[:-4] dictRes[idname] = [10000, ""] for root2, subFolders2, files2 in os.walk(mosedDir): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue tupleResult2 = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf2, False) lisBigSS2 = tupleResult2[1] strucc2 = tupleResult2[0] i = 0 idname2 = (os.path.basename(pdbf2))[:-4] translateStructurebyCentroidMass(cont, i, strucc, strucc2, outDir) tupleResultB = Bioinformatics.getFragmentListFromPDBUsingAllAtoms( outDir + str(cont) + "_" + str(i) + "_rottra.pdb", False) struFinal = tupleResultB[0] BlistFrags = tupleResultB[1] nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(lisBigSS2, BlistFrags, 10, 0, "A") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True # TODO:mettere l'info dei residui riguardo tutti i frammenti non solo quelli del primo nella list preS = (((lisBigSS[0])["resIdList"])[0])[1] preF = (((lisBigSS[0])["resIdList"])[-1])[1] realS = (((lisBigSS2[0])["resIdList"])[0])[1] realF = (((lisBigSS2[0])["resIdList"])[-1])[1] rmsd = Bioinformatics.getRMSD(pdbf2, outDir + str(cont) + "_" + str(i) + "_rottra.pdb", "PDB", listmodel=lisBigSS2, doNotMove=True) rmsd = rmsd[0] if rmsd < dictRes[idname][0]: if dictRes[idname][1] != "": os.remove(dictRes[idname][1]) shutil.copyfile(outDir + str(cont) + "_" + str(i) + "_rottra.pdb", outDir + "best_" + str(idname) + ".pdb") dictRes[idname] = [rmsd, outDir + "best_" + str(idname) + ".pdb", result, comp_windows, nWindows, idname2, anyWay, preS, preF, realS, realF] elif result: print "Parallel but not best rmsd:" print "idname1", idname, preS, preF print "idname2", idname2, realS, realF print "actual rmsd", dictRes[idname][0] print "FOUND RMSD:", rmsd, "FILE", outDir + "best_" + str(idname) print comp_windows os.remove(outDir + str(cont) + "_" + str(i) + "_rottra.pdb") cont += 1 i += 1 return dictRes def anisotropyCorrection_and_test(cm, sym, DicGridConn, DicParameters, current_dir, mtz, F, SIGF, Intensities, Aniso, nice, pda, hkl, ent, shelxe_line): global NUMBER_OF_FILES_PER_DIRECTORY global PATH_NEW_PHASER global PATH_NEW_SHELXE # CONFIGURE NUMBER OF PARALLEL FILES if cm != None and cm.type_grid == "torque": NUMBER_OF_FILES_PER_DIRECTORY = cm.parallel_jobs # TEST PYTHON VERSION info_p = sys.version_info info_g = (sys.version).splitlines() print "Your Python version is: ", info_g[0] if info_p[0] == PYTHON_V[0] and info_p[1] == PYTHON_V[1]: if info_p[2] != PYTHON_V[2]: print colored("OK:", "green"), "Your python version is compatible with this standalone" else: print colored("OK:", "green"), "Your python version is compatible with this standalone" elif info_p[0] == PYTHON_V[0] and info_p[1] > PYTHON_V[1]: print colored("WARNING:", "yellow"), "Your python installation is newer than required,\nno issues have been encountered with this version.\nIf you experience an error, please, try the version " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " and report the bug." else: print colored("FATAL:", "red"), "Your python version is not updated and it is incompatible with this standalone.\nPlease, install the Python " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " version." sys.exit(1) # TEST DIRECTORY and MTZ if not os.path.exists(current_dir): print colored("FATAL:", "red"), "The working directory: " + str( current_dir) + " does not exist or it is not accessible for the user: " + str(getpass.getuser()) sys.exit(1) if not os.path.exists(mtz): print colored("FATAL:", "red"), "The mtz: " + str( os.path.abspath(mtz)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) sys.exit(1) # TEST CONNECTION try: SystemUtility.open_connection(DicGridConn, DicParameters, cm) except: print colored("FATAL", "red"), print "remote connection cannot be established. You were trying to connect to the remote grid at:" print colored("host", "blue"), print DicGridConn["host"] print colored("with port", "blue"), print DicGridConn["port"] print colored("as user", "blue"), print DicGridConn["username"] print "Please, check again that your credentials (including your key or password) are correct,\nand if the problem persists contact your local administrator to report this error." sys.exit(1) # TEST PHASER PATH and VERSION outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid if len(os.path.split(PATH_NEW_PHASER)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_PHASER], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_PHASER = out.strip() if not os.path.exists(PATH_NEW_PHASER): print colored("FATAL:", "red"), "The path given for phaser: " + str( os.path.abspath(PATH_NEW_PHASER)) + " does not exist or it is not accessible to the user: " + str( getpass.getuser()) sys.exit(1) f = open(os.path.join(current_dir, "th12323.sh"), "w") f.close() f = open(os.path.join(current_dir, "th12323.sh"), "r") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() # PULIZIA FILES INUTILI os.remove(os.path.join(current_dir, "th12323.sh")) if os.path.exists(os.path.join(current_dir, "PHASER.sum")): os.remove(os.path.join(current_dir, "PHASER.sum")) outlines = out.splitlines() else: # remote_grid cm.create_remote_file("th12323.sh") out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_PHASER) + ' < th12323.sh', cm.promptB) outlines = out.splitlines() cm.remove_remote_file("th12323.sh") cm.remove_remote_file("PHASER.sum") version_ok = False for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for phaser: " + str(os.path.abspath( PATH_NEW_PHASER)) + " does not exist in the remote workstation or it is not accessible for the remote user: " + \ DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False sys.exit(1) elif PHASER_V1 in lin or PHASER_V2 in lin: version_ok = True # print "AAAAAAAAAAAAAAAAAAAAAAAAAA",out if not version_ok: print colored("FATAL:", "red"), "The Phaser version is not compatible with this standalone. Please, update your version to ", str( PHASER_V1), "or", str(PHASER_V2) # NOTE: This is temporary deactivated for developing purpose sys.exit(1) # TEST GRID current_dir2 = "" if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir("testx1245") print cm.change_remote_dir("testx1245") cm.copy_local_file(mtz, os.path.basename(mtz), send_now=True) # copy the file not create the link cm.copy_local_file(hkl, "test12345.hkl", send_now=True) if os.path.exists(ent): cm.copy_local_file(ent, "test12345.ent", send_now=True) # TEST PHASER f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") f.write("NORM EPSFAC WRITE anis.norm" + "\n") f.write('ROOT anis\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "anis.sh"), "anis.sh", send_now=True) listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) def startANISO(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice -n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startANISO, current_dir, "anis") p.start() break else: print "FATAL ERROR: ARCIMBOLDO cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires ARCIMBOLDO to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("anis" + DicParameters["nameExecution"]) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = "anis.sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile(mtz, False) job.addInputFile("anis.sh", False) job.addOutputFile("anis.out", False) # job.addOutputFile("anis.mtz",False) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Phaser Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) trial = 10 sleep = 1 if cm is None: # multiprocessing trial = 60 # sleep = 3 sleep = 10 # testing, now the anis takes much longer elif not hasattr(cm, "channel"): # local grid trial = 90 sleep = 4 else: # remote grid trial = 150 sleep = 5 nt = 0 error = False while True: nt += 1 print "Trying PHASER test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): # print "Checking stat of the file" out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) print out print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "anis.out")) and os.stat( os.path.join(current_dir, "anis.out")).st_size > 0: f = open(os.path.join(current_dir, "anis.out"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("anis.out", os.path.join(current_dir, "anis.out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue # print cm.remove_remote_file("anis.out") # print cm.remove_remote_file("anis.sum") # print cm.remove_remote_file("anis.sh") wse = cm.get_remote_file("anis.mtz", os.path.join(current_dir, "anis.mtz"), only_get_this=True) wse = cm.get_remote_file("anis.tncs", os.path.join(current_dir, "anis.tncs"), only_get_this=True) wse = cm.get_remote_file("anis.norm", os.path.join(current_dir, "anis.norm"), only_get_this=True) break elif os.path.exists(os.path.join(current_dir, "anis.out")): wse = checkYOURoutput(os.path.join(current_dir, "anis.out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break else: # print "File ",os.path.join(outputDic,str(fromIn)+".out"),os.path.exists(os.path.join(outputDic,str(fromIn)+".out")) time.sleep(sleep) continue if os.path.exists(os.path.join(current_dir, "anis.sh")): os.remove(os.path.join(current_dir, "anis.sh")) if os.path.exists(os.path.join(current_dir, "anis.sum")): os.remove(os.path.join(current_dir, "anis.sum")) out_phaser = "" err_phaser = "" if os.path.exists(os.path.join(current_dir, "anis.out")): f = open(os.path.join(current_dir, "anis.out"), "r") out_phaser = f.read() f.close() if not error: print out_phaser os.remove(os.path.join(current_dir, "anis.out")) if not os.path.exists(os.path.join(current_dir, "anis.mtz")): print colored("FATAL", "red"), "It was not possible to generate the anisotropy scaled mtz of your data. The program will end now." error = True if error: sys.exit(1) # READING THE SPACEGROUP FROM PHASER OUT spaceGroup = readSpaceGroupFromOut(out_phaser) # TODO: check if the space group read is valid. I guess phaser would not work if it is not shoncke but yes if non standard # READING THE CELL DIMENSIONS FROM PHASER OUT cell_dim = cellDimensionFromOut(out_phaser) # READING THE RESOLUTION FROM PHASER OUT resolution = resolutionFromOut(out_phaser) # READING THE NUMBER OF UNIQUE REFLECTIONS FROM PHASER OUT unique_refl = uniqueReflectionsFromOut(out_phaser) # GENERATING PDA foc = open(os.path.join(current_dir, "test12345.pdb"), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc.write(pda) foc.close() # NOTE: Test SHELXE if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) shutil.move(os.path.join(current_dir, "test12345.pdb"), os.path.join(current_dir, "test12345.pda")) if not os.path.exists(hkl): print colored("FATAL:", "red"), "The hkl: " + str( os.path.abspath(hkl)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid if len(os.path.split(PATH_NEW_SHELXE)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_SHELXE = out.strip() if not os.path.exists(PATH_NEW_SHELXE): print colored("FATAL:", "red"), "The path given for shelxe: " + str( os.path.abspath(PATH_NEW_SHELXE)) + " does not exist or it is not accessible by the user: " + str( getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() outlines = out.splitlines() else: # remote_grid out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_SHELXE), cm.promptB) outlines = out.splitlines() version_ok = False # print "============DEBUG=====================" # print out, err # print "======================================" for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for shelxe: " + str(os.path.abspath( PATH_NEW_SHELXE)) + " does not exist in the remote workstation or it is not accessible by the remote user: " + \ DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) elif SHELXE_V1 in lin or SHELXE_V2 in lin or SHELXE_V3 in lin or SHELXE_V4 in lin: version_ok = True if not version_ok: print colored("FATAL:", "red"), "The SHELXE version is not compatible with this standalone. Please, update your version to ", str( SHELXE_V2) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "test12345.pda"), "test12345.pda", send_now=True) else: shutil.copyfile(hkl, os.path.join(current_dir, "test12345.hkl")) if os.path.exists(ent): shutil.copyfile(ent, os.path.join(current_dir, "test12345.ent")) def startExpJob(outputDirectory, op, lineargs): print "Executing..." print PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + ".pda"), " ".join(lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + ".pda"] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) nq = 1 lia = shelxe_line.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shelxe_line) nl = 0 if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, current_dir, "test12345", lia) p.start() break else: print "FATAL ERROR: Arcimboldo cannot load correctly information on CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires Arcimboldo to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("shelt" + DicParameters["nameExecution"]) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile("test12345.hkl", False) job.addInputFile("test12345.pda", False) if os.path.exists(ent): job.addInputFile("test12345.ent", False) lia = shelxe_line.split() lio = ["test12345.pda"] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Shelxe Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) nt = 0 error = False while True: nt += 1 print "Trying SHELXE test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now. Please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) print out print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "test12345.lst")) and os.stat( os.path.join(current_dir, "test12345.lst")).st_size > 0: f = open(os.path.join(current_dir, "test12345.lst"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or the Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("test12345.lst", os.path.join(current_dir, "test12345.lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break elif os.path.exists(os.path.join(current_dir, "test12345.lst")): wse = checkYOURoutput(os.path.join(current_dir, "test12345.lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(20) continue break else: time.sleep(sleep) continue initCC = None fneed = False if os.path.exists(os.path.join(current_dir, "test12345.lst")): f = open(os.path.join(current_dir, "test12345.lst"), "r") allshe = f.read() f.close() lines = allshe.split("\n") for up in range(len(lines)): line = lines[up] if (line.strip()).startswith("Overall CC between"): line3 = line.strip() line3L = line3.split() initCC = float((line3L[-1])[:-1]) if (line.strip()).startswith("<|E^2-1|>"): line3 = line.strip() line3L = line3.split() if len(line3L) > 3: fneed = (line3L[4] == "-f" and line3L[5] == "missing") else: fneed = False if initCC == None: print print allshe print print colored("FATAL", "red"), "The Shelxe test job has failed. Please, read the shelxe output for errors. Check shelxe line arguments, shelxe version and hkl format correctness." error = True if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) if os.path.exists(os.path.join(current_dir, "test12345.hkl")): os.remove(os.path.join(current_dir, "test12345.hkl")) if os.path.exists(os.path.join(current_dir, "test12345.phs")): os.remove(os.path.join(current_dir, "test12345.phs")) if os.path.exists(os.path.join(current_dir, "test12345.ent")): os.remove(os.path.join(current_dir, "test12345.ent")) if os.path.exists(os.path.join(current_dir, "test12345.pdo")): os.remove(os.path.join(current_dir, "test12345.pdo")) if os.path.exists(os.path.join(current_dir, "test12345_trace.ps")): os.remove(os.path.join(current_dir, "test12345_trace.ps")) if os.path.exists(os.path.join(current_dir, "test12345.lst")): os.remove(os.path.join(current_dir, "test12345.lst")) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) if error: sys.exit(1) return os.path.join(current_dir, "anis.mtz"), os.path.join(current_dir, "anis.norm"), os.path.join(current_dir, "anis.tncs"), F, SIGF, spaceGroup, cell_dim, resolution, unique_refl, out_phaser, err_phaser, fneed def anisotropyCorrection(current_dir, mtz, F, SIGF, Intensities, Aniso, nice): f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") f.write("NORM EPSFAC WRITE anis.norm" + "\n") f.write('ROOT "' + os.path.join(current_dir, "anis") + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() f = open(os.path.join(current_dir, "anis.sh"), "r") p = None p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() print out print err # PULIZIA FILES INUTILI os.remove(os.path.join(current_dir, "anis.sh")) os.remove(os.path.join(current_dir, "anis.sum")) # NOTE: if to be used with non corrected mtz, change the labels return os.path.join(current_dir, "anis.mtz"), os.path.join(current_dir, "anis.norm"), os.path.join(current_dir, "anis.tncs"), F + "_ISO", SIGF + "_ISO", out, err def executePicasso(rootdir, sym, nameJob, model_file, quate, cell_dim, laue, ncs, Clusters, ensembles, ent, idealhelixpdb): dictRMSD = {} print "Initiliazing Picasso Post-Mortem Process..." dbdir = os.path.join(rootdir, "pmdb") if os.path.exists(dbdir): shutil.rmtree(dbdir) os.makedirs(dbdir) shutil.copyfile(ent, os.path.join(dbdir, os.path.basename(ent)[:-4] + ".pdb")) tupleResult = Bioinformatics.getFragmentListFromPDB(model_file, True, False) listaFra = tupleResult[1] old_ens_path = None pathide = None if len(listaFra) == 1 and listaFra[0]["sstype"] in ["ah", "ch"]: old_ens_path = ensembles.values()[0] new_ens_path = old_ens_path + ".bak" shutil.copyfile(old_ens_path, new_ens_path) print "Computing Gyre LLG for all possible rotations of helices of", listaFra[0]["fragLength"], "aa" print "Generating models..." print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" print "Skipping Gyre LLG test! " else: howmany = [] pathide = os.path.join(rootdir, nameJob + "_idealhelix/") if os.path.exists(pathide): shutil.rmtree(pathide) shutil.move(os.path.join(rootdir, "./library/"), pathide) f2 = open(os.path.join(rootdir, nameJob + "_idealrot_pm.txt"), "w") for root2, subFolders2, files2 in os.walk(pathide): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue f2.write(pdbf2 + "\tRMSD:" + str(rmsd) + "\n") howmany.append(pdbf2) pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][ 54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() f2.close() dipl = {} C = [] hp = ADT.Heap() brncv = {} rota = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y] rol["llg"] = 0.0 rol["zscore"] = 0.0 hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, pathide, "examine", brncv) if listaFra[0]["fragLength"] >= 13: new_length = listaFra[0]["fragLength"] - 4 helix_list = Bioinformatics.getIdealHelicesFromLenghts([new_length], idealhelixpdb) f = open(old_ens_path, "w") f.write(helix_list[0][0]) f.close() print "Generating all rotation clusters and rotational symmetry equivalent in ", os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "..." getTheTOPNOfEachCluster({}, 1, os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "matrix", quate, Clusters, ensembles, 1, True, False, None, True, cell_dim, laue, ncs, modeTra="Cmass") print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" try: shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) shutil.rmtree(os.path.join(rootdir, "Final_Frags_" + nameJob + "/")) shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.rmtree(os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) except: pass return {}, None if os.path.exists(os.path.join(rootdir, nameJob + "_mosed")): shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.move(os.path.join(rootdir, "./library/"), os.path.join(rootdir, nameJob + "_mosed")) for root2, subFolders2, files2 in os.walk(os.path.join(rootdir, nameJob + "_mosed")): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() print "Artificially computing translations for model", model_file dictRMSD = localizeModelFragmentsInRealStructure(os.path.join(rootdir, nameJob + "_mosed"), os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) f = open(os.path.join(rootdir, nameJob + "_rot_pm.txt"), "w") for key in sorted(dictRMSD.keys()): value = dictRMSD[key] linea = "PREDICTED: " + str(key) + " [" + str(value[7]) + ":" + str(value[8]) + "]" + "\t\t" + "RMSD: " + str( value[0]) + "\t" + "FILE: " + str(value[1]) + "\t" + "REAL: " + str(value[5]) + " [" + str( value[9]) + ":" + str(value[10]) + "]" f.write(linea + "\n") linea = "DIST_VAL: " + str(value[2]) f.write(linea + "\n") for i in range(len(value[6])): linea = "\t[" f.write(linea + "\n") for t in range(len((value[6])[i])): linea = "\t\t" + str(((value[6])[i])[t]) f.write(linea + "\n") linea = "\t]" f.write(linea + "\n") linea = "---------------------------------------------------------------" f.write(linea + "\n") f.close() print "End of Picasso Post-Mortem Process" # CLEANING UP shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) # shutil.rmtree(os.path.join(rootdir,nameJob+"_mosed")) # shutil.rmtree(os.path.join(rootdir,"Final_Frags_"+nameJob+"/")) # shutil.rmtree(os.path.join(rootdir,"Frags_ROTTRA_"+nameJob+"/")) if pathide is not None: return dictRMSD, os.path.join(pathide, "examine.sum") else: return dictRMSD, None def analyze_all_solutions(sym, i, DicParameters, CCV, cnv, hkl, cell_dim, spaceGroup, shlxLinea0, mosed_dir, outputDire): if os.path.exists(outputDire): shutil.rmtree(outputDire) os.makedirs(outputDire) for item in CCV: ent = item["corresp"] structure = Bioinformatics.getStructure("ent", ent) atomi = [] for model in structure: for chain in model: for resi in chain: for aty in resi: atomi += [aty] listatm = translateListByFrac(0, item["model"], atomi, item["shift_origin"], cell_dim, return_atoms=True) pdball = Bioinformatics.getPDBFromListOfAtom(listatm) CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f = open(os.path.join(outputDire, os.path.basename(ent)), "w") f.write(CRD.format(**data) + "\n") f.write(pdball[0]) f.close() """ ent = os.path.join(outputDire,os.path.basename(ent)) (nqueue6,convNames6) = startExpansion(sym,"20_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),hkl,ent,cell_dim,spaceGroup,shlxLinea0,mosed_dir) CC_Val1 = evaluateExp_CC(DicParameters, sym, "6_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),nqueue6,convNames6,isArcimboldo=True) shutil.move(os.path.join(outputDire,"6_EXPVAL_LIBRARY/solCC.sum"),os.path.join(outputDire,"sol_"+os.path.basename(ent)[:-4]+".sum")) shutil.rmtree(os.path.join(outputDire,"6_EXPVAL_LIBRARY/")) """ def trim_small_chains(pdbf, minRes): struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > minRes: chn.append(ch) chains = chn how = 0 allAtm = [] for cha in chains: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] f = open(pdbf, "w") f.write(pdball) f.close() return how def generate_chunks_byChain_progressive(pdbf, min_res, direc): listPDBS = [] struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = len(chn) print "====", min_res, chains - 1 for t in range(min_res, chains): print "evaluating", t listP = generate_chunks_byChain(pdbf, t, direc, addnum=len(listPDBS)) # print "=================================" # print listP # print "=================================" listPDBS += listP return listPDBS def generate_chunks_byChain(pdbf, omit_ss, direc, addnum=0): if not (os.path.exists(direc)): os.makedirs(direc) struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = chn listPDBS = [] for t in range(len(chains)): piece = None if t == 0: piece = chains[t + omit_ss:] elif t == len(chains) - omit_ss: piece = chains[:t] else: if t + omit_ss < len(chains): piece = chains[:t] + chains[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(chains) piece = chains[adn:t] # print adn,"-",t,len(piece) print "---", piece allAtm = [] how = 0 for cha in piece: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] pdbid, a, b = os.path.basename(pdbf)[:-4].split("_") nomefi = pdbid + "_" + str(t + addnum + 1) + "_0.pdb" filename = os.path.join(direc, nomefi) f = open(filename, "w") f.write(pdball) f.close() listPDBS.append((filename, how)) return listPDBS def generate_chunks_bySS(pdbf, omit_ss, direc): if not (os.path.exists(direc)): os.makedirs(direc) tupleResult = Bioinformatics.getFragmentListFromPDB(pdbf, False, False) struc = tupleResult[0] listaFra = tupleResult[1] listPDBS = [] if not isinstance(omit_ss, tuple): for t in range(len(listaFra)): piece = None if t == 0: piece = listaFra[t + omit_ss:] elif t == len(listaFra) - omit_ss: piece = listaFra[:t] else: if t + omit_ss < len(listaFra): piece = listaFra[:t] + listaFra[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(listaFra) piece = listaFra[adn:t] # print adn,"-",t,len(piece) pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) else: lista_ah = [] lista_bs = [] for fra in listaFra: if fra["sstype"] in ["ah", "ch"]: lista_ah.append(fra) elif fra["sstype"] in ["bs", "cbs"]: lista_bs.append(fra) num_ah = omit_ss[0] num_bs = omit_ss[1] piece_ah = [] for t in range(len(lista_ah)): piece = None if t == 0: piece = lista_ah[t + num_ah:] elif t == len(lista_ah) - num_ah: piece = lista_ah[:t] else: if t + num_ah < len(lista_ah): piece = lista_ah[:t] + lista_ah[t + num_ah:] else: adn = (t + num_ah) - len(lista_ah) piece = lista_ah[adn:t] piece_ah.append(piece) piece_bs = [] for t in range(len(lista_bs)): piece = None if t == 0: piece = lista_bs[t + num_bs:] elif t == len(lista_bs) - num_bs: piece = lista_bs[:t] else: if t + num_bs < len(lista_bs): piece = lista_bs[:t] + lista_bs[t + num_bs:] else: adn = (t + num_bs) - len(lista_bs) piece = lista_bs[adn:t] piece_bs.append(piece) for pie_ah in piece_ah: for pie_bs in piece_bs: piece = pie_ah + piece_bs pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) return listPDBS def generate_chunks_progressive(pdbf, from_omit_res, to_omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc): listPDBS = [] indicators = {} for t in range(from_omit_res, to_omit_res): indicator_A = len(listPDBS) listPDBS += generate_chunks(pdbf, t, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc, addnum=len(listPDBS)) indicator_B = len(listPDBS) indicators[t] = (indicator_A, indicator_B) return listPDBS, indicators def generate_chunks(pdbf, omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc, addnum=0): pathname = copy.deepcopy(pdbf) if not (os.path.exists(direc)): os.makedirs(direc) listPDBS = [] limitis = 0 if fix_ss != None and remove_coil != None: tupleResult = Bioinformatics.getFragmentListFromPDB(pathname, False, False) struc = tupleResult[0] listaFra = tupleResult[1] listAtoms = [] for fra in listaFra: if (fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs"): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: limitis += 1 for atm in residue: listAtoms.append(atm) if remove_coil == "maintain_coil": if not ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) elif remove_coil == "remove_coil": if ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_bs") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_ah")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(listAtoms, renumber=True, uniqueChain=True)[0] f = open(pathname, "w") f.write(pdball) f.close() pdbf = cStringIO.StringIO(pdball) listAll = Bioinformatics.getListCA("model", pdbf, "PDB", backbone=True, allInList=True, minResInChain=3) # print "Residui",len(listAll[0][0]) workList = listAll[0][0] fixed_list = [] if limitis > 0: workList = listAll[0][0][:limitis] fixed_list = listAll[0][0][limitis:] for t in range(0, len(workList), step_size): piece = None omitted = None if t == 0: piece = workList[t + omit_res:] omitted = workList[:t + omit_res] elif t == len(workList) - omit_res: piece = workList[:t] omitted = workList[t:] else: if t + omit_res < len(workList): piece = workList[:t] + workList[t + omit_res:] omitted = workList[t:t + omit_res] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_res) - len(workList) piece = workList[adn:t] omitted = workList[:adn] + workList[t + 1:] # print adn,"-",t,len(piece) allAtm = [] newlip2 = [] for resi in piece: for atm in resi: allAtm.append(atm) vald = atm.get_parent().get_id()[1] if vald not in newlip2: newlip2.append(vald) newlip = [] omitatm = [] for resi in omitted: for atm in resi: vald = atm.get_parent().get_id()[1] omitatm.append(atm) if vald not in newlip: newlip.append(vald) if mode_type == "omit": allAtm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(allAtm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[0] elif mode_type == "fragment": omitatm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(omitatm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[0] qsa = os.path.basename(pathname).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t + addnum) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() if mode_type == "omit": listPDBS.append([filename, len(piece), newlip]) elif mode_type == "fragment": listPDBS.append([filename, len(omitatm), newlip2]) """ diffch = [] for resi in omitted: for atm in resi: diffch.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm,chainFragment=True,diffchain=diffch)[0] filename = os.path.join(direc,os.path.basename(pathname)[:-4]+"-"+str(t+addnum)+"B.pdb") f = open(filename,"w") f.write(pdball) f.close() #listPDBS.append((filename,len(piece))) """ return listPDBS def startOMITllg_nogyre(DicParameters, cm, sym, DicGridConn, model_file, sizes, nameJob, outputDire, sumPath, howmany, indic, number_cluster, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPath, "ROTSOL") Clusel = [] prio = None rota = None if number_cluster != None: for clu in CluAll: if clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] == number_cluster: Clusel = clu prio, rota = clu["heapSolutions"].asList()[0] break liall = sorted(Clusel["heapSolutions"].asList(), __cmp_rota2, reverse=True) if not (os.path.exists(outputDire)): os.makedirs(outputDire) fildname = os.path.join(outputDire, str(1) + "_graph") if not os.path.exists( os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] pr = None rk = None for item in liall: pr, rk = item if os.path.basename(convNames[rk["name"]]).split("-")[-1][:-4] == str(jk): break # NOTE: to be honest rk cannot be None because it will be the last rotation of the cluster if it is not found the correct # one, so llg will never be 0. To have rk == None a boolean flag or something else must be used, but in this way # like it is now maybe is even better because we put the lowest llg observed in the cluster for the models that # are not present in the cluster if rk == None: rk = {"llg": 0} else: qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str(key) + "TOP.data\" using 1:2 title \"t" + str( key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write( str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str(res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str(safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(number_cluster) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) """ dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire,"../../../library/peaks_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/pklat_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble"+str(y) brncv[rol["name"]] = models_final[y] hp.push((-1*rol["llg"],-1*rol["zscore"]),rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames,CluAll,RotClu,encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire,"final.sum"),"ROTSOL") """ """ for du in Dust: for item in du["heapSolutions"].asList(): pi,ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf,3) """ return convNames def startOMITllg(DicParameters, cm, sym, DicGridConn, mode, sizes, nameJob, outputDire, model_file, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, convNames, quate, laue, ncs, clusteringAlg, cell_dim, thresholdCompare, evaLLONG, sumPath, howmany, indic, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, sampl=-1, VRMS=False, BFAC=False, trim_to_polyala=True, sigr=0.0, sigt=0.0, preserveChains=False, ent=None, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): limit_times = 2 size_rnp = copy.deepcopy(sizes) if mode == "RNP": limit_times = len(sizes) Clust = [] cnv = {} if sumPath != None and os.path.exists(sumPath): Clust, cnv = readClustersFromSUM(sumPath) else: din = {"heapSolutions": ADT.Heap()} lo = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} din["heapSolutions"].push((-1 * lo["llg"], -1 * lo["zscore"]), lo) Clust.append(din) cnv["ensemble1"] = model_file mode = "FRF" Dust = [] for tru in Clust: sol = tru["heapSolutions"].pop() Dust.append({"heapSolutions": ADT.Heap()}) while sol != None: prio, rota = sol try: sol = tru["heapSolutions"].pop() except: sol = None if LIMIT_CLUSTER != None and rota["n_prev_cluster"] != LIMIT_CLUSTER: continue # print "---------",rota["name"],cnv[rota["name"]] number_cluster = rota["n_prev_cluster"] times = 0 if True: times += 1 if not os.path.exists( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")): dipl = {} C = [] hp = ADT.Heap() brncv = {} tdncv = {} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y][0] tdncv[rol["name"]] = howmany[y][1] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "examine", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "examine.sum"), "ROTSOL") SystemUtility.open_connection(DicGridConn, DicParameters, cm) if mode == "RNP": ######METHOD RNP######## (nqueue10, convino) = startRNP(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times), CluAll, brncv, os.path.join(outputDire, str(times)), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, sampl=sampl, tops=tops, VRMS=VRMS, USE_TNCS=USE_TNCS, USE_RGR=USE_RGR, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL, RNP_GYRE=RNP_GYRE) CluAll, convNames, tolose = evaluateFTF(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times), os.path.join(outputDire, str(times)), nqueue10, brncv, -10, frag_fixed, quate, "RNP", laue, ncs, clusteringAlg, cell_dim, thresholdCompare, evaLLONG, LIMIT_CLUSTER=LIMIT_CLUSTER, convNames=convino, tops=1) sumPACK = os.path.join(outputDire, "examine.sum") CluAll, convNames = mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames) writeSumClusters(CluAll, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames, LIMIT_CLUSTER=LIMIT_CLUSTER) elif mode == "FRF": (nqueue, convNames) = startFRF(DicParameters=DicParameters, cm=cm, sym=sym, nameJob=nameJob + rota["name"] + "_" + str(times), dir_o_liFile=os.path.join(outputDire, rota["name"] + "_" + str( times) + "_models"), outputDire=os.path.join(outputDire, str(times)), mtz=mtz, MW=MW, NC=NC, F=F, SIGF=SIGF, Intensities=Intensities, Aniso=Aniso, normfactors=normfactors, tncsfactors=tncsfactors, nice=nice, RMSD=RMSD, lowR=lowR, highR=highR, final_rot=75, save_rot=75, frag_fixed=frag_fixed, spaceGroup=spaceGroup, sampl=sampl, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) CluAll, Rotclu = evaluateFRF_clusterOnce(DicParameters, cm, sym, DicGridConn, [], nameJob + rota["name"] + "_" + str(times), os.path.join(outputDire, str(times)), nqueue, quate, laue, ncs, spaceGroup, convNames, clusteringAlg, -100.0, frag_fixed, cell_dim, thresholdCompare, evaLLONG, applyNameFilter=False, tops=None) writeSumClusters(CluAll, outputDire, "clus", convNames) Clulu, cnvu = readClustersFromSUM(os.path.join(outputDire, "clus.sum")) Dur = [{"heapSolutions": ADT.Heap()}] cndn = {} for cr in Clulu: for item in cr["heapSolutions"].asList(): pr, ty = item pdf = cnvu[ty["name"]] vks = os.path.basename(pdf).split("-")[1][:-4] ty["name"] = "ensemble" + str(vks) cndn[ty["name"]] = pdf ty["n_prev_cluster"] = 0 ty["original_rotcluster"] = "0" Dur[0]["heapSolutions"].push((-1 * ty["llg"], -1 * ty["zscore"]), ty) writeSumClusters(Dur, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", cndn) # convNames = readClustersFromSUMToDB(DicParameters,os.path.join(outputDire, "clus_merged.sum"),"ROTSOL") elif mode == "BRF": nq, conv2 = startBRF(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) else: # if mode == "RGR": ######METHOD RGR######## nq, conv2 = startRGR(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, sigr=sigr, sigt=sigt, preserveChains=preserveChains, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True, ent=ent) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) os.remove(os.path.join(outputDire, "examine.sum")) if os.path.exists(os.path.join(outputDire, "examine.sum")): os.remove(os.path.join(outputDire, "examine.sum")) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) liall = Cnew[0]["heapSolutions"].asList() fildname = os.path.join(outputDire, str(times) + "_graph") if not os.path.exists(os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb")) or not os.path.exists(os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] for item in liall: pr, rk = item if rk["name"] == "ensemble" + str(jk): qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [ rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str( key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str( key) + "TOP.data\" using 1:2 title \"t" + str(key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write(str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str( res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str( safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(LIMIT_CLUSTER) + "\t" + str(numpy.mean(all_var)) + "\t" + str( numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = models_final[y] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "final.sum"), "ROTSOL") for du in Dust: for item in du["heapSolutions"].asList(): pi, ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf, 3) return convNames def startMR_ELLG(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, ensembles={}, fromN=0, toN=None, ellg_target=30): ''' Given a list of pdbs and their rmsd, computes both the eLLG with the model and the number of residues to get a target eLLG Keyword arguments: ellg_target: integer with the target ellg ''' dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): # Remote grid current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 # First fragment case if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): print "Preparing: " + str(pdbf) + " as model " + str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "ellg.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "ellg.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ELLG" + "\n") f.write('HKLIN "ellg.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE eLLG computation" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("ELLG TARG " + str(ellg_target) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # N fragment case print "Sorry, multiple fragments case is not yet implemented" sys.exit(0) def startMR_ELLGJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "MR_ELLG of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startMR_ELLGJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("ellg.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startFRF(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ensembles={}, tops=None, LIMIT_CLUSTER=None, justpdb=None, fromN=0, toN=None, VRMS=False, BFAC=True, BULK_FSOL=-1, BULK_BSOL=-1): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): if justpdb != None and pdbf != os.path.abspath(justpdb): continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FRF" + "\n") f.write('HKLIN "rot.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test rotation for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(counter + 1) + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("SEARCH DEEP OFF" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # even if frag_fixed is 2 we still do not have computed anything for the second fragment search so we have to recollect data # as they would be at the first fragment, then when we evaluateFRF we will change the content of the dataase and the FTF could # be computed taking the data as 2 fragment. ensels = [] tovalu = frag_fixed if frag_fixed == 2: tovalu = 1 cou = 0 ClusAll = None if LIMIT_CLUSTER != None: ClusAll = dir_o_liFile[LIMIT_CLUSTER] else: ClusAll = {} nh = ADT.Heap() for clun in dir_o_liFile: for item in clun["heapSolutions"]: prio, rota = item nh.push(prio, rota) ClusAll["heapSolutions"] = nh for sol in ClusAll["heapSolutions"]: if tops != None and cou >= tops: break rota = sol[1] prio = sol[0] pdbf = "" list_pdbs = {} ct = 0 enselines = "" if frag_fixed > 1 and "ensemble" + str(frag_fixed) in ensembles: pdbf = ensembles["ensemble" + str(frag_fixed)] if pdbf not in list_pdbs.keys(): list_pdbs[pdbf] = ("ensemble" + str(frag_fixed), ct, True) ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, False) ct += 1 elif list_pdbs[ensembles[rota["name"]]][0] != rota["name"]: dfr = list_pdbs[ensembles[rota["name"]]][1] snc = "ENSEMBLE " + rota["name"] + " PDBFILE " + str(dfr) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) else: pdbf = ensembles[rota["name"]] # ensembles["ensemble"+str(frag_fixed)] = pdbf if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 # rota["name"] = "ensemble"+str(frag_fixed) if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 elif list_pdbs[ensembles[rotafi["name"]]][0] != rotafi["name"]: dfr = list_pdbs[ensembles[rotafi["name"]]][1] snc = "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str(dfr) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) numall = ct # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") headlines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FRF" + "\n" headlines += 'HKLIN "rot.mtz"' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' headlines += "TITLE Test rotation for Grid" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] snc = "ENSEMBLE " + val[0] + " PDBFILE " + str(val[1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n" if snc not in ensels: enselines += snc ensels.append(snc) if val[2]: enselines += "SEARCH ENSEMBLE " + val[0] + "\n" if frag_fixed > 1: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" if frag_fixed > 2: for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) taillines += "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n" taillines += "SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(rota["llg"]) + " ZSCORE: " + str( rota["zscore"]) + "\n" taillines += "SEARCH METHOD FAST" + "\n" taillines += "SEARCH DEEP OFF" + "\n" taillines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: taillines += "SOLPARAMETERS BULK USE ON" + "\n" taillines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: taillines += "SOLPARAMETERS BULK USE OFF" + "\n" if sampl != -1: taillines += "SAMPLING ROT " + str(sampl) + "\n" taillines += "PEAKS ROT SELECT PERCENT" + "\n" taillines += "PEAKS ROT CUTOFF " + str(save_rot) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),"./"+str(counter)+".sh") counter += 1 # print "checking",counter,counter%NUMBER_OF_FILES_PER_DIRECTORY,ClusAll["heapSolutions"].len(),counter >= 1 and ClusAll["heapSolutions"].len() == 0 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (counter >= 1 and ClusAll["heapSolutions"].len() == 0): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) cou += 1 if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE ROTATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFRFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all rotations with a peak >= " + str(final_rot) print "Saving all rotations with a peak >= " + str(save_rot) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFRFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rot.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startPlaneElongation(inputDirectory, outputDirectory, n_res, n_trials, n_copies): import Data if n_copies > 3: print "Cannot use more than 3 copies to exapand the partial solution. Quit." sys.exit(1) if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): rmsd = None allAtoms = [] atoms_add = [] pdball = None f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) list_res = [] for model in struc: for chain in model: list_res += chain.get_unpacked_list() for resi in list_res: for atm in resi: atoms_add.append(atm) charch = ["W", "Y", "Z"] sx = 0 for key in Data.frequent_folds.keys(): pdball = "" sx += 1 struttura = Data.frequent_folds[key] struc_c = Bioinformatics.getStructure(key, cStringIO.StringIO(struttura)) list_res_c = [] atoms_add_c = [] for model_c in struc_c: for chain_c in model_c: list_res_c += chain_c.get_unpacked_list() for resi_c in list_res_c: for atm_c in resi_c: atoms_add_c.append(atm_c) for nd in range(n_copies): indo = numpy.random.randint(len(list_res) - n_res) reference = [] for z in range(indo, indo + n_res): atomCA = list_res[z]["CA"] atomC = list_res[z]["C"] atomO = list_res[z]["O"] atomN = list_res[z]["N"] reference.append(atomCA.get_coord()) reference.append(atomC.get_coord()) reference.append(atomO.get_coord()) reference.append(atomN.get_coord()) best_rt = (None, None) best_rmsd = 100 rmsd = 100 times = 0 while times < 5000: times += 1 compare = [] indi = numpy.random.randint(len(list_res_c) - n_res) for z in range(indi, indi + n_res): atomCA = list_res_c[z]["CA"] atomC = list_res_c[z]["C"] atomO = list_res_c[z]["O"] atomN = list_res_c[z]["N"] compare.append(atomCA.get_coord()) compare.append(atomC.get_coord()) compare.append(atomO.get_coord()) compare.append(atomN.get_coord()) transf, rmsd_list, rmsd = Bioinformatics.fit_wellordered(numpy.array(reference), numpy.array(compare), n_iter=1, full_output=True) R, t = transf if rmsd < best_rmsd: best_rmsd = rmsd best_rt = (R, t) # print "Selected best rmsd for",key,"is",best_rmsd,times allAtoms = Bioinformatics.transform_atoms(atoms_add_c, best_rt[0], best_rt[1]) pdball += Bioinformatics.getPDBFromListOfAtom(allAtoms, renumber=True, uniqueChain=True, chainId=charch[nd])[0] + "\n" filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + "-" + str(sx) + ".pdb") print "File", os.path.basename(filename), "rmsd:", best_rmsd f = open(filename, "w") f.write(allpdb + "\n") f.write(pdball + "\n") f.close() def startRandomlyExpand(inputDirectory, outputDirectory, ray, n_ca_pick, n_trials): ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): atoms_add = [] f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) for model in struc: for chain in model: list_res = chain.get_unpacked_list() indices = numpy.random.randint(len(list_res) - 1, size=n_ca_pick) print "Picking random indices", indices for index in indices: atom_ca = list_res[index]["CA"] nx, ny, nz = atom_ca.get_coord() cz = numpy.random.randint(-1 * ray, high=ray) phi = numpy.random.uniform(low=0.0, high=2.0) nx += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.cos(phi) ny += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.sin(phi) nz += cz atoms_add.append([nx, ny, nz, atom_ca.get_parent().get_segid(), atom_ca.get_name(), atom_ca.get_fullname(), atom_ca.get_altloc()]) filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + ".pdb") f = open(filename, "w") f.write(allpdb + "\n") atom_number = 1 resseq = 1 line = "" for atom in atoms_add: args = ( "ATOM ", atom_number, atom[5], atom[6], 'ALA', 'Z', resseq, ' ', atom[0], atom[1], atom[2], 1.0, 25.0, atom[3], 'C', ' ') line = ATOM_FORMAT_STRING % args resseq += 1 atom_number += 1 f.write(line) f.close() def startPREPARE(cm, sym, nameJob, CC_Val, outputDirectory, cell_dim, spaceGroup, nTop, topNext=None): if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) counter = 0 convNames = {} if topNext != None: counter = 0 listaWork_key = map(lambda x: "ensembleIDxx" + os.path.basename(x["corresp"])[:-4].split("xx")[-1], CC_Val) listaWork_value = map(lambda x: x["initcc"], CC_Val) listaWork = dict(zip(listaWork_key, listaWork_value)) # print listaWork_key # print listaWork_value counter = 0 for clu in CC_Val: pdbf = clu["corresp"] if nTop != None and counter > nTop: break # print clu print "Preparing: " + str(pdbf) + " as model " + str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if "suffix" in clu.keys(): nomino = os.path.basename(pdbf)[:-4] + clu["suffix"] + ".pdb" else: nomino = os.path.basename(pdbf) foc = open(outputDirectory + "/" + nomino, "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") data2 = foc2.readlines() datas = "" for lin in data2: ler = lin.split() if ler[0] != "CRYST1": datas += lin foc2.close() foc.write(datas) foc.close() convNames["ensemble" + str(counter)] = outputDirectory + "/" + nomino counter += 1 return convNames def getTheTOPNOfEachCluster(DicParameters, frag_fixed, dirout, mode, quate, ClusAll, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, modeTra="frac", LIMIT_CLUSTER=None, renameWithConvNames=False, sufixSolPos=False, appendToName=""): if dirout != "" and not os.path.exists(dirout): os.makedirs(dirout) bests = [] foms = {"llg": [numpy.inf, 0.0], "zscore": [numpy.inf, 0.0]} for ci in range(len(ClusAll)): clu = ClusAll[ci] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != ci: continue s = 0 cou = 0 hp = ADT.Heap() liBest = [] i = 0 for sol in clu["heapSolutions"]: if ntop != None and i >= ntop: break bestRota = sol[1] prio = (bestRota["llg"], bestRota["zscore"]) i += 1 # print bestRota["name"],convNames[bestRota["name"]] liBest.append(bestRota) llg = bestRota["llg"] zscore = bestRota["zscore"] cluster_fin = bestRota["original_rotcluster"] if llg < (foms["llg"])[0]: (foms["llg"])[0] = llg if llg > (foms["llg"])[1]: (foms["llg"])[1] = llg if zscore < (foms["zscore"])[0]: (foms["zscore"])[0] = zscore if zscore > (foms["zscore"])[1]: (foms["zscore"])[1] = zscore if writePDB: pdbSt = [[]] if "fixed_frags" in bestRota: for frifr in bestRota["fixed_frags"]: allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, frifr, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) pdbSt.append([]) allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, bestRota, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) wow = numpy.array(pdbSt) for fileC in wow.transpose(): path_c, new_name_cond = os.path.split(os.path.normpath(allpdbli.pop(0))) new_name_cond = new_name_cond.split("_")[0] + appendToName + "_" + new_name_cond.split("_")[ 1] + "_" + new_name_cond.split("_")[2] f = open(os.path.join(path_c, new_name_cond), "w") druppo = str(cluster_fin) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(Bioinformatics.sequentialRenumberListOfPdbs(fileC)) f.close() bests.append(liBest) # for bestRota in liBest: # priority = (-1*bestRota["llg"], -1*bestRota["zscore"]) # clu["heapSolutions"].push(priority,bestRota) return bests, foms def filterClustersAndSolutionByCores(CluAll, sym): numeroClus = 2 * sym.PROCESSES numeroRot = 64 * sym.PROCESSES CluAll = sorted(CluAll, __cmp_cluster, reverse=True) print "Original clusters", len(CluAll) n = 0 Clu = [] for clu in CluAll: if n >= numeroClus: print "Break clusters because", n, numeroClus break s = 0 clun = {"heapSolutions": ADT.Heap()} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] if s >= numeroRot: print "Break rotations because", s, numeroRot break clun["heapSolutions"].push(prio, rota) s += 1 Clu.append(clun) n += 1 print "Final Clusters", len(Clu) return Clu def filterAndCountClusters(ClusAll, ensembles, mode, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg, unify=False): condition = True while condition: nea = '' neb = '' numc = 0 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrpdbs = {} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] numc = rota["n_prev_cluster"] pdbname = ensembles[rota['name']] if ensembles[rota['name']] not in nrpdbs: nrpdbs[ensembles[rota['name']]] = rota else: r = nrpdbs[ensembles[rota['name']]] if mode == 'llg' and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'llg' and rota['llg'] == r['llg'] and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] == r['zscore'] and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota newDic = {"heapSolutions": ADT.Heap()} for key in nrpdbs.keys(): rota = nrpdbs[key] newDic["heapSolutions"].push((-1 * rota["llg"], -1 * ["zscore"]), rota) ClusAll[inde] = newDic if unify: condition, ClusAll = unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg) else: condition = False return ClusAll def applyFilterName(CluAll, ensembles): convNames = {} for clu in CluAll: rotazioni = map(lambda x: x[1], clu["heapSolutions"].asList()) pdb_done = [] hp = ADT.Heap() for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): if (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) priority = (-1 * rotaz['llg'], -1 * rotaz['zscore']) hp.push(priority, rotaz) convNames[rotaz["name"]] = ensembles[rotaz["name"]] clu["heapSolutions"] = hp return CluAll, convNames # NOTE: this method is supported just for ARCIMBOLDO-BORGES.py and NOT!!!! for ARCIMBOLDO def unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg): performed = False visited = [] numc = 0 for a in range(len(ClusAll)): clu1 = ClusAll[a] if a in visited: continue visited.append(a) if len(clu1["heapSolutions"].asList()) == 0: continue item1 = clu1["heapSolutions"].pop() clu1q = item1[1] clu1["heapSolutions"].push(item1[0], item1[1]) numc = clu1q["n_prev_cluster"] for b in range(a + 1, len(ClusAll)): if b in visited: continue clu2 = ClusAll[b] if len(clu2["heapSolutions"].asList()) == 0: continue item2 = clu2["heapSolutions"].pop() clu2q = item2[1] clu2["heapSolutions"].push(item2[0], item2[1]) # threshold_alg = -1 # if clusteringAlg == 'quaternion': # threshold_alg = 0.06 # elif clusteringAlg == 'distributionCV': # threshold_alg = 10 # else: # return re, elo = compareRotation(clu1q, clu2q, threshold_alg, clusteringAlg, quate, laue, listNCS, ensembles, cell_dim, True) if re: performed = True visited.append(b) for ite in clu2["heapSolutions"]: prio2 = ite[0] rota2 = ite[1] rota2['n_prev_cluster'] = numc # NOTE: the following instruction imply we are always working with 1 fixed frag. This method thus # it is not supported for ARCIMBOLDO but just for ARCIMBOLDO-BORGES rota2['original_rotcluster'] = numc priority = (-1 * rota2['llg'], -1 * rota2['zscore']) clu1["heapSolutions"].push(priority, rota2) return performed, ClusAll def filterOutImprobableSols(ClusAll, minLLG): llg_all = [] zscore_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) llg_all = sorted(llg_all, reverse=True) zscore_all = sorted(zscore_all, reverse=True) zscore_thresh = 0 llg_thresh = None if zscore_all[0] >= 7.50: zscore_thresh = 7.50 print "Found promising solutions with Zscore at:", zscore_all[0] print "Pruning solutions with lower zscore." ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["zscore"] >= zscore_thresh and rota["llg"] >= minLLG: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def filterAllSolsByTop(ClusAll, frag_fixed, percentage): llg_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) llg_avg = float(numpy.mean(numpy.array(llg_all))) # limiteLLG = (llg_avg*(100-percentage))/50.0 # if llg_avg < 0: # limiteLLG += llg_avg # if llg_top < 0: # limiteLLG += llg_top print "Pruning solutions out of the mean. AVG LLG:", llg_avg ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["llg"] >= llg_avg: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def startExpansion(cm, sym, nameJob, outputDire, hkl, ent, nice, cell_dim, spaceGroup, shlxLine, dirBase, limit_CC_data=None, treshold_CC=None, fragdomain=False, single=False, insfile=None): if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" convNames = {} rootdir = dirBase searchfor = ".pdb" startfile = ".pda" if insfile != None: limit_CC_data = None treshold_CC = None searchfor = ".phi" startfile = ".phi" for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(searchfor): if limit_CC_data != None and treshold_CC != None: toContinue = False for m in limit_CC_data: if m["corresp"] == pdbf: if float(m["initcc"]) < treshold_CC: toContinue = True else: toContinue = False break if toContinue: continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 if insfile == None: # shutil.copyfile(pdbf, outputDirectory+"/"+str(counter)+".pda") foc = open(outputDirectory + "/" + str(counter) + ".pda", "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") alls = foc2.readlines() foc2.close() countt = 0 scrivi = True previous = -1 lineaDascri = None if fragdomain: for linea in alls: if linea.startswith("REMARK"): foc.write(linea) elif linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: foc.write("REMARK DOMAIN " + str(countt + 1) + "\n") if lineaDascri != None: foc.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): foc.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: foc.write(linea) previous = residuo else: for linea in alls: foc.write(linea) foc.close() else: try: # os.symlink(pdbf,os.path.join(outputDirectory,str(counter)+".phi")) shutil.copyfile(pdbf, os.path.join(outputDirectory, str(counter) + ".phi")) except: print "", os.path.join(outputDirectory, str(counter) + ".phi"), "already exists!" try: os.symlink(hkl, os.path.join(outputDirectory, str(counter) + ".hkl")) except: print "", os.path.join(outputDirectory, str(counter) + ".hkl"), "already exists!" if insfile != None: if hasattr(cm, "channel"): shutil.copyfile(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) else: try: os.symlink(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) except: print "", os.path.join(outputDirectory, str(counter) + ".ins"), "already exists!" try: if os.path.exists(ent): os.symlink(ent, os.path.join(outputDirectory, str(counter) + ".ent")) except: print "", os.path.join(outputDirectory, str(counter) + ".ent"), "already exists!" if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) if insfile == None: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".pda", "./" + str(ndir - 1) + "/" + str(counter) + ".pda") else: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".phi", "./" + str(ndir - 1) + "/" + str(counter) + ".phi") cm.copy_local_file(outputDirectory + "/" + str(counter) + ".ins", "./" + str(ndir - 1) + "/" + str(counter) + ".ins") cm.create_remote_link(cm.remote_hkl_path, str(counter) + ".hkl") if os.path.exists(ent): cm.create_remote_link(cm.remote_ent_path, str(counter) + ".ent") convNames[str(counter) + startfile] = pdbf # conv2[rota["name"]] = str(counter)+".pda" counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startExpJob(outputDirectory, op, lineargs): if os.path.exists(os.path.join(outputDirectory, str(op) + ".phs")): return print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + startfile), " ".join( lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + startfile] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter lia = shlxLine.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shlxLine) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, outputDirectory, op, lia) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_SHELXE + " " + os.path.join(outputDirectory, str(op) + startfile) + " " + " ".join( lia) + "/dev/null" SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, single=single) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile(".hkl", True) job.addInputFile(startfile, True) if os.path.exists(ent): job.addInputFile(".ent", True) if insfile != None and os.path.exists(insfile): job.addInputFile(".ins", True) # job.addOutputFile(".out",True) # job.setMaxRuntime(172800) # job.setPeriodicRemove("JobStatus == 2 &&((CurrentTime - EnteredCurrentStatus) + RemoteWallClockTime - CumulativeSuspensionTime > $(maxRunTime))") lia = shlxLine.split() lio = [startfile] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, convNames def startPACK(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, packSelect, cutoff, distance, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False): if not (os.path.exists(outputDire)): os.makedirs(outputDire) if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and counter >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc() pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("PACK CUTOFF " + str(cutoff) + "\n") # NOTE: If PACK COMPACT ON, Pack ensembles into a compact association # (minimize distances between centres of mass for the addition of each component in a solution) f.write("PACK COMPACT OFF" + "\n") # OFF at the moment, check if it would be better ON f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startPACKOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, packSelect, cutoff, distance, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, VRMS=False, BFAC=False, randomize_trans_per_rot=0): if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for cds in range(len(ClusAll)): clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = sol[0] cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 # print "===================================TEST RAPIDO=================" # print rota["name"],ensembles[rota["name"]] # print rota["fixed_frags"][0]["name"],ensembles[rota["fixed_frags"][0]["name"]] # print "===================================TEST RAPIDO=================" if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc(file=sys.stdout) pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # print "B remote cwd",cm.get_remote_pwd() if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("PACK CUTOFF " + str(cutoff) + "\n") f.write("PACK COMPACT OFF" + "\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") else: for valok in list_pdbs: valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] lip.append(int(rota["original_rotcluster"].split("_")[-1])) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: lip.append(int(r["original_rotcluster"].split("_")[-1])) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) # print "!!!!!!!!!!",lip fixd = [] for ep in range(len( lip) - 1): # the last one would be excluded because is not the fixed but actual rotation ced = str(lip[ep]) initial = None # print "cambio de guardia",ced while True: rep = ros.pop(0) # print "initial,initial==rep",initial,initial==rep # if initial != None: # print rep["name"],rep["original_rot_cluster"],initial["name"],initial["original_rot_cluster"] if initial != None and initial == rep: ros.append(rep) break if initial == None: initial = rep # print rep["original_rotcluster"].split("_")[-1], ced if rep["original_rotcluster"].split("_")[-1] == ced: if len(fixd) == 0: rep["original_rotcluster"] = ced rep["n_prev_cluster"] = int(ced) else: rep["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + ced rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) # if ep == len(lip)-1: # rep["fixed_frags"] = fixd # else: rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) # print "He entrado aqui" break else: # print "No es la que quiero paso a la proxima" ros.append(rep) # print "////////////////",len(ros) rota = ros.pop() # print rota # print "/////////////////",len(fixd) # print fixd # print "//////////////////" if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] # print rota["original_rotcluster"] if randomize_trans_per_rot <= 0: if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 else: for rnds in numpy.random.uniform(low=0.000, high=0.999, size=(randomize_trans_per_rot, 3)): if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " " + rota[ "name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startOCC(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ellg=None, nres=None, rangeocc=None, merge=None, occfrac=None, occoffset=None, ncycles=None, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "occ.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "occ.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_OCC" + "\n") f.write('HKLIN "occ.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test OCC for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("OCCUPANCY WINDOW NRES 1" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") f.write("SOLU 6DIM ENSE ensemble" + str(counter + 1) + " EULER 0.0 0.0 0.0 FRAC 0.0 0.0 0.0\n") if ellg != None and isinstance(ellg, float): f.write("OCCUPANCY WINDOW ELLG " + str(ellg) + "\n") if nres != None and isinstance(nres, int): f.write("OCCUPANCY WINDOW NRES " + str(nres) + "\n") if rangeocc != None and isinstance(rangeocc, list) and len(rangeocc) == 2: f.write("OCCUPANCY MIN " + str(rangeocc[0]) + " MAX " + str(rangeocc[1]) + "\n") if merge != None and isinstance(merge, bool): if merge: f.write("OCCUPANCY MERGE ON" + "\n") else: f.write("OCCUPANCY MERGE OFF" + "\n") if occfrac != None and isinstance(occfrac, float): f.write("OCCUPANCY FRAC " + str(ellg) + "\n") if occoffset != None and isinstance(occoffset, int): f.write("OCCUPANCY OFFSET " + str(occoffset) + "\n") if ncycles != None and isinstance(ncycles, int): f.write("OCCUPANCY NCYCLES " + str(ncycles) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startOCCJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "OCC of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startOCCJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("occ.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, ensembles def startNMA(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, ensembles def startNMAFromClusters(DicParameters, cm, sym, ClusAll, ensembles, nameJob, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 conv2 = {} for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") conv2["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAP1Job(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def shredFromEnt(ent, length): listAll = Bioinformatics.getListCA("ent", ent, "PDB", backbone=True, allInList=True) fullength = len(listAll[0][0]) trozos = fullength - length piecesPDBs = [] for t in range(trozos): piece = listAll[1][t:length] allAtm = [] for resi in piece: for atm in resi: allAtm.append(atm) piecesPDBs.append(getPDBFromListOfAtom(allAtm)[0]) return piecesPDBs def startBRF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1): global listaEva if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "brf.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "brf.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics.getPDBFromListOfAtom(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=not isOMIT) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ROT" + "\n") f.write("TARGET ROT BRUTE" + "\n") f.write('HKLIN "brf.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement vs Rotation fucntion " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") f.write("#SPACEGROUP " + spaceGroup + "\n") # f.write("XYZOUT ON"+"\n") # f.write("TOPFILES "+str(1)+"\n") # if not USE_TNCS: f.write("TNCS USE OFF\n") # else: # f.write("TNCS USE ON\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") """ else: f.write("SOLU SET"+"\n") f.write("SOLU SPAC "+spaceGroup+"\n") if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+" #CLUSTER: "+str(rota["original_rotcluster"].split("_")[-1])+"\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+"\n") """ if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("SEARCH METHOD FULL" + "\n") f.write("SEARCH DEEP OFF" + "\n") f.write( "ROTATE VOLUME AROUND EULER " + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + " RANGE " + str(5) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "BRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("brf.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRGR(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, sigr=0.0, sigt=0.0, preserveChains=False, BULK_FSOL=-1, BULK_BSOL=-1): global listaEva # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rgr.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rgr.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics.getPDBFromListOfAtom(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=(not isOMIT and not preserveChains)) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_GYRE" + "\n") f.write('HKLIN "rgr.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement vs Rotation function " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("MACR ROT ON TRA ON SIGR " + str(sigr) + " SIGT " + str(sigt) + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") f.write("TNCS USE OFF\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") f.write("SORT ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " #CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RGR of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rgr.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRBRP1(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): global listaEva # TODO: This function should use the files with the norm and tncs factors, but those of the P1 data, that should be generated at the beginning # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" RNP_GYRE = "\n MACMR CHAINS ON" if RNP_GYRE else "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "refP1.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtzP1_path, "refP1.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") # corresponding = (pdbf.split("/"))[-1] # listona = corresponding.split("_") # pdbid = listona[0] # model = listona[1] # idSolution = listona[2] # idSolution,ext = idSolution.split(".") lop = open(outputDirectory + "/" + str(counter) + ".pdb", "r") alls = lop.readlines() lop.close() countt = 0 scrivi = True lai = None previous = -1 lineaDascri = None for linea in alls: if linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) lai = open(outputDirectory + "/" + str(countt) + "_" + str(counter) + ".pdb", "w") if lineaDascri != None: lai.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): lai.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: lai.write(linea) previous = residuo if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) numFrag = countt f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "refP1.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE refinement in P1 " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") for i in range(countt): f.write( "ENSEMBLE ensemble" + str(i) + " PDBFILE " + str(i) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for i in range(countt): if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, numFrag, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPP1Job(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP in P1 of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("refP1.mtz", False) for i in range(numFrag): job.addInputFile(str(i) + "_" + ".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def readMR_ELLGout(path_out_file=None, out_text=None, pdb_filepath=None, nfrag=1): if nfrag != 1: print "Currrently only a single fragment is supported" sys.exit(0) if path_out_file != None and out_text == None: out_file = open(path_out_file, 'r') out_lines = out_file.readlines() del out_file elif out_text != None: out_lines = out_text.splitlines() pdb_file = open(pdb_filepath, 'r') list_ca = Bioinformatics.getListCA(name=pdb_filepath[:-4], pdbf=pdb_file, mode='PDB') nres = len(list_ca[0][0]) results = {} regex_table1 = re.compile("eLLG Values Computed for All Data") regex_table2 = re.compile("Residues/Resolution for eLLG target") regex_table3 = re.compile("Resolution for eLLG target") older_ver = True for i in range(len(out_lines)): if bool(regex_table1.findall(out_lines[i])) and (not out_lines[i + 2].split()[0] == 'target-reso:'): ensemble_name = (out_lines[i + 3]).split()[0] frac_scat = (out_lines[i + 3]).split()[1] current_ellg = float(((out_lines[i + 3]).split())[4]) if bool(regex_table1.findall(out_lines[i])) and (out_lines[i + 2].split()[0] == 'target-reso:'): ensemble_name = (out_lines[i + 4]).split()[0] frac_scat = (out_lines[i + 4]).split()[1] current_ellg = float(((out_lines[i + 4]).split())[4]) if bool(regex_table2.findall(out_lines[i])): nres_for_target = (out_lines[i + 5]).split()[2] res_for_target = (out_lines[i + 5]).split()[3] older_ver = False if bool(regex_table3.findall(out_lines[i])) and older_ver: nres_for_target = None res_for_target = (out_lines[i + 3]).split()[2] results = {'number_of_residues': nres, 'fraction_scattering': frac_scat, 'ellg_current_ensemble': current_ellg, 'nres_for_target_ellg': nres_for_target, 'resolution_for_target': res_for_target, 'fullpath': pdb_filepath} return results def readMR_ELLGsum(path_sum_file, nfrag=1): dict_result = {} file_sum = open(path_sum_file, 'r') lines = file_sum.readlines() del file_sum for i in range(0, len(lines), 5): ensemble = getNewPathFromMerging(path_sum_file,((lines[i + 1]).split())[1]) # print 'ensemble',ensemble dict_result[ensemble] = {} dict_result[ensemble]['number_of_residues'] = int(((lines[i + 2]).split())[1]) # print "dict_result[ensemble]['number_of_residues']",dict_result[ensemble]['number_of_residues'] dict_result[ensemble]['fraction_scattering'] = float(((lines[i + 2]).split())[3]) # print "dict_result[ensemble]['fraction_scattering']",dict_result[ensemble]['fraction_scattering'] if ((lines[i + 2]).split())[8] != 'None': # the nres was available dict_result[ensemble]['nres_for_target_ellg'] = int(((lines[i + 2]).split())[8]) else: # no nres was computed dict_result[ensemble]['nres_for_target_ellg'] = None # print "dict_result[ensemble]['nres_for_target_ellg']",dict_result[ensemble]['nres_for_target_ellg'] dict_result[ensemble]['ellg_current_ensemble'] = float((lines[i + 3].split())[2]) # print "dict_result[ensemble]['ellg_current_ensemble']",dict_result[ensemble]['ellg_current_ensemble'] dict_result[ensemble]['resolution_for_target'] = float((lines[i + 3].split())[7]) # print "dict_result[ensemble]['resolution_for_target']",dict_result[ensemble]['resolution_for_target'] return dict_result def evaluateMR_ELLG(DicParameters, cm, DicGridConn, nameJob, outputDicr, nqueue, ensembles, isArcimboldo=False, nfrag=1): ''' This function reads the output from the startMR_ELLG function and evaluates it, producing a sum file. Keyword input: - DicParameters - cm: grid object - DicGridConn - nameJob - outputDicr - nqueue - ensembles - isArcimboldo - nfrag Return: - dict_result Writes sum file Closes the connection ''' # Prepare the directories dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): # Only in remote grid case current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) # Prepare variables to read fromIn = 0 toIn = nqueue - 1 ndir = 0 dict_result = {} dirente2 = "" current_dir2 = "" # Read while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' mr_ellg file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir = ndir + 1 while 1: outname = str(fromIn) + ".out" local_path = os.path.join(outputDic, outname) if hasattr(cm, 'channel'): file_ended = cm.get_remote_file(remotefile=outname, localfile=local_path, conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(file_ended, bool) and not file_ended: print "File " + outname + " not ready sleeping 3 seconds" time.sleep(3) continue break elif os.path.exists(local_path): checkYOURoutput(myfile=local_path, conditioEND=PHASER_OUT_END_CONDITION_LOCAL, testEND=PHASER_OUT_END_TEST) break else: time.sleep(3) continue # Read the information from the out out_file = open(local_path, 'r') out_content = out_file.read() print out_content del out_file pdb_filepath = outputDic + str(fromIn) + ".pdb" results = readMR_ELLGout(out_text=out_content, pdb_filepath=pdb_filepath) name_ensemble = ensembles['ensemble' + str(fromIn + 1)] dict_result[name_ensemble] = results lastFile = False if fromIn == toIn: lastFile = True candelete = True if candelete: try: numb = fromIn os.remove(outputDic + str(numb) + ".pdb") os.remove(outputDic + str(numb) + ".sh") os.remove(outputDic + str(numb) + ".out") except Exception: # Then we are not in local pass fromIn = fromIn + 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print 'current_dir',current_dir # print "dirente",dirente cm.change_remote_dir(current_dir) # print cm.remove_remote_dir(os.path.basename(dirente)) # print "cm.get_remote_pwd()",cm.get_remote_pwd() SystemUtility.close_connection(DicGridConn, DicParameters, cm) # Write the SUM file filepath_sum = os.path.join(dirente, 'ellg_computation.sum') file_sum = open(filepath_sum, 'w') filepath_table = os.path.join(dirente, 'table_ellg.sum') file_table = open(filepath_table, 'w') file_table.write("Size\teLLG\tName\n") for ensemble in dict_result.keys(): file_sum.write("===========\n") file_sum.write("ENSEMBLE: " + ensemble + "\n") file_sum.write( "NRESIDUES: " + str(dict_result[ensemble]['number_of_residues']) + "\t FRACTION_SCATTERING: " + str( dict_result[ensemble]['fraction_scattering']) + "\t NRESIDUES FOR TARGET ELLG: " + str( dict_result[ensemble]['nres_for_target_ellg']) + "\n") file_sum.write("CURRENT ELLG: " + str( dict_result[ensemble]['ellg_current_ensemble']) + "\t RESOLUTION FOR TARGET ELLG: " + str( dict_result[ensemble]['resolution_for_target']) + "\n") file_sum.write("===========\n") # TEMPORARY? TABLE TO ANALYZE ELLG file_table.write(str(dict_result[ensemble]['number_of_residues']) + '\t' + str( dict_result[ensemble]['ellg_current_ensemble']) + '\t' + ensemble + '\n') del file_table del file_sum return dict_result def evaluateOCC(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, ensembles): dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.readlines() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) fe = open(outputDic + str(fromIn) + ".pdb", "r") fer = fe.readlines() fe.close() clus = None for luc in fer: if luc.startswith("REMARK CLUSTER"): clus = luc.split()[2] break if clus is None: clus = "-1" for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: # fut_name = outputDic+os.path.basename(ensembles["ensemble"+str(tud)][:-4]+"_"+str(cud)+".pdb") fut_name = outputDic + os.path.basename(ensembles["ensemble" + str(tud)][:-4] + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() os.remove(os.path.join(outputDic, ler)) fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(clus) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') try: os.remove(outputDic + str(nb) + ".1.mtz") except: pass if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateNMA(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, rotclusid, nqueue, ensembles): dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: fut_name = outputDic + os.path.basename( ensembles["ensemble" + str(tud)][:-4] + "_" + str(cud) + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() os.remove(os.path.join(outputDic, ler)) fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(rotclusid) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False): global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue # wse = cm.get_remote_file(str(i)+".1.pdb",os.path.join(outputDic,str(i)+".1.pdb"),lenght_ext=6,conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION,testEND=PHASER_RLIST_SOL_PDB_END_TEST) # if isinstance(wse,bool) and not wse: # print "File "+str(i)+".1.pdb not ready sleeping 3 seconds..." # time.sleep(3) # continue wse = cm.get_remote_file(str(i) + ".rlist", os.path.join(outputDic, str(i) + ".rlist"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".rlist not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: # atest = os.path.exists(os.path.join(outputDic,str(i)+".1.pdb")) atest = True btest = os.path.exists(os.path.join(outputDic, str(i) + ".rlist")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" new_euler = [] old_euler = [] llg = 0.0 zscore = 0.0 while riprova: try: try: fer = open(outputDic + str(i) + ".rlist", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 data_chain = {} for ferlinea in ferli: if ferlinea.startswith("SOLU TRIAL"): listy = ferlinea.split() llg = 0.0 zscore = 0.0 try: zscore = float(listy[9]) except: zscore = 0.0 new_euler = [float(listy[5]), float(listy[6]), float(listy[7])] try: p = subprocess.Popen('grep -A1 loggraph ' + outputDic + str(i) + ".out", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() outlist = outp.splitlines() llg = float(outlist[1].split()[1]) except: llg = 0.0 break # NOTE: TEMPORARY for test gyre structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(new_euler[0], new_euler[1], new_euler[2])], cell_dim, return_atoms=True) new_list_atoms += chain_atoms new_list_atoms = sorted(new_list_atoms, __cmp_atom) fer = open(outputDic + str(i) + ".sh", "r") ferli = fer.readlines() fer.close() for ferlinea in ferli: if ferlinea.startswith("#SPACEGROUP"): spaceGroup = ferlinea.split("#SPACEGROUP")[1] spaceGroup = spaceGroup.strip() if ferlinea.startswith("ROTATE"): lisr = ferlinea.split() old_euler = [float(lisr[4]), float(lisr[5]), float(lisr[6])] structure = Bioinformatics.getStructure("full", os.path.abspath( outputDic + os.path.basename(str(i) + ".pdb"))) old_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(old_euler[0], old_euler[1], old_euler[2])], cell_dim, return_atoms=True) old_list_atoms += chain_atoms old_list_atoms = sorted(old_list_atoms, __cmp_atom) PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_original.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBORIGINAL) f.close() PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_created.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBCREATED) f.close() # NOTE: TEST END cancel this block after test if os.path.exists(models_directory): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics.getStructure("full", ori_path) new_list_resi = [] structureCOMP = Bioinformatics.getStructure("full2", cStringIO.StringIO(PDBCREATED)) for model in structureCOMP.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): new_list_resi += [resi] old_list_resi = [] structureREF = Bioinformatics.getStructure("full3", cStringIO.StringIO(PDBORIGINAL)) for model in structureREF.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): old_list_resi += [resi] # print "+++++++",len(old_list_resi),len(new_list_resi) # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structureFINE, pdball) = Bioinformatics.getSuperimp( [old_list_resi], [new_list_resi], "PRECOMPUTED", algorithm="biopython", allAtomsModel=PDBCREATED, backbone=True) # ,listmodel=listcomp) lineas = PDBCREATED.splitlines() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structureFINE, listCoord, outputDic, title) else: rmsd = -100 nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = -100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = new_euler ro["frac"] = [0.0, 0.0, 0.0] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: #print "Trying to remove *sum files" #print sys.exc_info() pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.mtz") except: #print "Trying to remove *1.mtz files" #print sys.exc_info() pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False, ent=None): global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" while riprova: try: try: fer = open(outputDic + str(i) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0.0 zscore = 0.0 data_chain = {} first = False for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): if first: break else: listy = ferlinea.split() llg = float(listy[2].split("=")[-1]) first = True if ferlinea.startswith("SOLU GYRE"): topl = ferlinea.split() chainid = topl[4][-2] data_chain[chainid] = [float(topl[6]), float(topl[7]), float(topl[8]), float(topl[10]), float(topl[11]), float(topl[12])] # NOTE: TEMPORARY for test gyre """ structure = Bioinformatics.getStructure("full",os.path.join(outputDic,str(i)+".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(data_chain[chain_name][0],data_chain[chain_name][1],data_chain[chain_name][2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,data_chain[chain_name][3:],cell_dim,return_atoms=True) new_list_atoms += chain_atoms fer = open(outputDic+str(i)+".sh","r") ferli = fer.readlines() fer.close() euler = [] for ferlinea in ferli: if ferlinea.startswith("SOLU TRIAL"): topl = ferlinea.split() euler = [float(topl[5]),float(topl[6]),float(topl[7])] if ferlinea.startswith("SOLU SPAC"): spaceGroup = ferlinea.split("SOLU SPAC")[1] spaceGroup = spaceGroup.strip() structure = Bioinformatics.getStructure("full",os.path.abspath(outputDic+os.path.basename(str(i)+".pdb"))) old_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(euler[0],euler[1],euler[2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,[0.0,0.0,0.0],cell_dim,return_atoms=True) old_list_atoms += chain_atoms PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] f = open(os.path.abspath(outputDic+os.path.basename(str(i)+".1_original.pdb")),"w") CRD='{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data=dict(r='CRYST1',a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data)+"\n") f.write(PDBORIGINAL) f.close() PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] f = open(os.path.abspath(outputDic+os.path.basename(str(i)+".1_created.pdb")),"w") CRD='{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data=dict(r='CRYST1',a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data)+"\n") f.write(PDBCREATED) f.close() """ # NOTE: TEST END cancel this block after test if os.path.exists(models_directory): """ structure = Bioinformatics.getStructure("full",os.path.join(outputDic,str(i)+".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] #resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0,0,chain_atoms,[matrixFromEulerAngles(data_chain[chain_name][0],data_chain[chain_name][1],data_chain[chain_name][2])],cell_dim,return_atoms=True) chain_atoms = translateListByFrac(0,0,chain_atoms,data_chain[chain_name][3:],cell_dim,return_atoms=True) new_list_atoms += chain_atoms """ structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".1.pdb")) new_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() new_list_atoms += chain_atoms # print "------",len(new_list_atoms),len(new_list_atoms) new_list_atoms = sorted(new_list_atoms, __cmp_atom) ori_path = SystemUtility.findInSubdirectory( os.path.basename(convNames[str(i) + ".1.pdb"][0]), subdirectory=models_directory) # if models_directory in convNames[str(i)+".1.pdb"][0]: # ori_path = convNames[str(i)+".1.pdb"][0] # else: # ori_path = os.path.join(models_directory,os.path.basename(convNames[str(i)+".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file # new_list_atoms = sorted(new_list_atoms,__cmp_atom) if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics.getStructure("full", ori_path) old_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() old_list_atoms += chain_atoms # print "------",len(old_list_atoms),len(new_list_atoms) old_list_atoms = sorted(old_list_atoms, __cmp_atom) PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] new_list_resi = [] structureCOMP = Bioinformatics.getStructure("full2", cStringIO.StringIO(PDBCREATED)) for model in structureCOMP.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): new_list_resi += [resi] old_list_resi = [] structureREF = Bioinformatics.getStructure("full3", cStringIO.StringIO(PDBORIGINAL)) for model in structureREF.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): old_list_resi += [resi] # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structureFINE, pdball) = Bioinformatics.getSuperimp( [old_list_resi], [new_list_resi], "PRECOMPUTED", algorithm="biopython", allAtomsModel=PDBCREATED, backbone=True) # ,listmodel=listcomp) lineas = PDBCREATED.splitlines() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structureFINE, listCoord, outputDic, title) else: rmsd = -100 nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = -100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) nresc = -1 rms0 = -1 diffrms = -1 p = subprocess.Popen('grep -c " CA " ' + os.path.join(outputDic, str(i) + ".1.pdb"), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() nresc = int(outp) nmin = nresc - 10 ndiff = -1 if os.path.exists(ent): # TODO: FIND the BEST Rt to superpose the template to the final structure # 1: start BORGES_MATRIX using the template as model reference # (rmsent,nrefent,ncoment,allent,cment,pdent) = Bioinformatics.getSuperimp(os.path.join(outputDic,str(i)+".1.pdb"),ent,"PDB",algorithm="minrms",backbone=True,minmaxrms=(core_init,core_init)) # (rms0,nref0,ncom0,all0,cm0,pd0) = Bioinformatics.getSuperimp(ori_path,ent,"PDB",algorithm="minrms",backbone=True,minmaxrms=(core_fin,core_fin)) # ndiff = ncom0-ncoment # rmsd_init_core_fin = rms0 # rmsd_fin_core_init = rmsent (rmsent, nrefent, ncoment, allent, cment, dicent) = Bioinformatics.getSuperimp( os.path.join(outputDic, str(i) + ".1.pdb"), ent, "PDB", algorithm="superpose", backbone=True) (rms0, nref0, ncom0, all0, cm0, dic0) = Bioinformatics.getSuperimp(ori_path, ent, "PDB", algorithm="superpose", backbone=True) rmsd_init = rms0 core_init = ncom0 dicc_init = dic0 rmsd_fin = rmsent core_fin = ncoment dicc_fin = dicent (rmsd_core_common_init, rmsd_core_common_fin, core_common) = Bioinformatics.getRMSDfromCommonCore(dicc_init, dicc_fin) diffrmsd = rmsd_core_common_init - rmsd_core_common_fin # print "Model:",os.path.join(outputDic,str(i)+".1.pdb"),"RMSD",rmsent,"NREF",nrefent,"NCOM",ncoment f = open(outputDic + "../pm_gyre.sum", "a") f.write("MODEL: " + convNames[str(i) + ".1.pdb"][0] + " SIZE: " + str( nresc) + " RMSD_GYRE: " + str("%.2f" % rmsd) + " RMSD_INIT: " + str( "%.2f" % rmsd_init) + " CORE_INIT: " + str(core_init) + " RMSD_FIN: " + str( "%.2f" % rmsd_fin) + " CORE_FIN: " + str(core_fin) + " RMSD_INIT_COMMON: " + str( "%.2f" % rmsd_core_common_init) + " RMSD_FIN_COMMON: " + str( "%.2f" % rmsd_core_common_fin) + " RMSD_DIFF: " + str( "%.2f" % diffrmsd) + " CORE_COMMON: " + str(len(core_common.keys())) + " LLG: " + str( llg) + "\n") f.close() """ dbdir = os.path.join(outputDicr,"pmdb") stored = os.path.join(outputDicr,"stored_sup") if os.path.exists(dbdir): shutil.rmtree(dbdir) if not os.path.exists(stored): os.makedirs(stored) os.makedirs(dbdir) nameofent = os.path.basename(ent)[:5] shutil.copyfile(ent, os.path.join(dbdir,nameofent+".pdb")) print "Starting BORGES to find",os.path.join(outputDic,str(i)+".1.pdb"),"into",ent,"..." Parameters = {} Parameters["model"] = ori_path #os.path.join(outputDic,str(i)+".1.pdb") Parameters["dir"] = dbdir Parameters["wdir"] = outputDicr Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 0 Parameters["nilges"] = 10 Parameters["enhance_fold"] = True Parameters["remove_coil"] = True pars,opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars,opt,doCluster=False,superpose=False,sym=sym,process_join=True) if os.path.exists(os.path.join(outputDicr,"./library/")): # 2: Find the best superposition between the template and all the extracted models f = open(os.path.join(outputDicr,"./library/"+"list_rmsd.txt"),"r") allines - f.readlines() rmsd_gyre_vs_ent = float(allines[0].split()[0]) best_model = allines[0].split()[1] shutil.copyfile(os.path.join(outputDicr,"./library/"+best_model),os.path.join(stored,str(i)+".1.sup.pdb")) shutil.rmtree(dbdir) os.remove(os.path.join(outputDicr,"input_search.pdb")) if os.path.exists(os.path.join(outputDicr,"./library/")): shutil.rmtree(os.path.join(outputDicr,"./library/")) """ if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") # f.write("MODEL: "+str(i)+" CORRESP.: "+os.path.abspath(outputDic+os.path.basename(convNames[str(i)+".1.pdb"][0]))+" RMSD_GYRE: "+str("%.2f" % rmsd)+" RMSD_INIT: "+str("%.2f" % rms0)+" RMSD_ENT: "+str("%.2f" % rmsd_gyre_vs_ent)+" DIFF: "+str("%.2f" % diffrms)+" GYRE_ALIGNED_RES: "+str(ncoment)+" RESDIFF: "+str(ndiff)+" OVER_RES: "+str(nresc)+" LLG: "+str(llg)+"\n") f.write("MODEL: " + str(i) + " CORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + " RMSD_GYRE: " + str( "%.2f" % rmsd) + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF_ATM: " + str( nref) + "\t" + "NCOM_ATM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = data_chain["A"][:3] ro["frac"] = data_chain["A"][3:] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(os.path.join(outputDic, str(i) + ".1.pdb"),outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE try: os.remove(outputDic + str(i) + ".pdb") except: pass os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sol") except: pass try: os.remove(outputDic + str(i) + ".sum") except: pass try: os.remove(outputDic + str(i) + ".rlist") except: pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.pdb") except: pass try: os.remove(outputDic + str(i) + ".1.mtz") except: pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRefP1(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, quate, convNames, ensembles, LIMIT_CLUSTER=None): global listaEva dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True while riprova: try: try: tupla = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(convNames[str(i) + ".1.pdb"][0], False) listcomp = tupla[1] # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structure, pdball) = Bioinformatics.getSuperimp( convNames[str(i) + ".1.pdb"][0], outputDic + str(i) + ".1.pdb", "PDB", algorithm="biopython", listmodel=listcomp) flon = open(outputDic + str(i) + ".1.pdb", "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structure, listCoord, outputDic, title) except: rmsd = -100 nref = 0 ncom = 0 # if rmsd == -100: # print "ATTENTION: Models not comparable!!!!" # tra,fixe,dis = readTranslationsFTF(outputDic,str(i),quate,"RNP_P1") fer = open(outputDic + str(i) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): listy = ferlinea.split() llg = float(listy[2][4:]) zscore = 0.0 try: zscore = float(listy[3][5:]) except: zscore = 0.0 break ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() # PULIZIA FILES INUTILI if maintainOrigCoord and rmsd != -100: os.remove(outputDic + str(i) + ".1.pdb") shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(outputDic + str(i) + ".1.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: pass os.remove(outputDic + str(i) + ".sol") try: os.remove(outputDic + str(i) + ".1.mtz") except: pass for t in range(numFrag): os.remove(outputDic + str(t) + "_" + str(i) + ".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: # print "Error...Trying to read again output files..." # print sys.exc_info() # traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) os.remove(outputDic + "refP1.mtz") if hasattr(cm, "channel"): print cm.remove_remote_file(nameJob + ".cmd") print cm.remove_remote_file("refP1.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) return CluWork, ensembles def startRNP(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if RNP_GYRE: RNP_GYRE = "\n MACMR CHAINS ON" else: RNP_GYRE = "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 conv2 = {} for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] if RNP_GYRE: rota['euler']=[0.0,0.0,0.0] rota['frac'] = [0.0, 0.0, 0.0] prio = (rota["llg"], rota["zscore"]) if tops != None and cou >= tops: break list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] if not USE_RGR: # Then we can use the original models print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") elif USE_RGR: # Entered condition of gyre, we need to copy the files print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) good_remote_path = os.path.join(current_dir2, os.path.basename(dirente2)) cm.copy_local_file(localfile=pdbf, remotefile=os.path.join(good_remote_path, str(val[1]) + "_" + str( counter) + ".pdb"), remote_path_asitis=True, send_now=True) if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("TOPFILES " + str(nrts + 30) + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRNPOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False): if VRMS: VRMS = "ON" else: VRMS = "OFF" if BFAC: BFAC = "ON" else: BFAC = "OFF" if RNP_GYRE: RNP_GYRE = "\n MACMR CHAINS ON" else: RNP_GYRE = "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 for cds in range(len(ClusAll)): clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") # if Aniso: # f.write("MACANO PROTOCOL OFF"+"\n") # f.write("MACTNCS PROTOCOL OFF"+"\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(nrts + 30) + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( RMSD) + "\n") else: for valok in list_pdbs: valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(RMSD) + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] # lip.append(rota["original_rotcluster"].split("_")[-1]) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: lip.append(r["original_rotcluster"].split("_")[-1]) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) # print "!!!!!!!!!!",lip fixd = [] for ep in range(len(lip)): ced = lip[ep] initial = None # print "cambio de guardia",ced while True: rep = ros.pop(0) if initial != None and initial == rep: ros.append(rep) break if initial == None: initial = rep # print rep["original_rotcluster"].split("_")[-1], ced if rep["original_rotcluster"].split("_")[-1] == ced: if len(fixd) == 0: rep["original_rotcluster"] = ced rep["n_prev_cluster"] = int(ced) else: rep["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + ced rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) # if ep == len(lip)-1: # rep["fixed_frags"] = fixd # else: rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) # print "He entrado aqui" break else: # print "No es la que quiero paso a la proxima" ros.append(rep) rota = ros.pop() if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] # print rota["original_rotcluster"] if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) conv2[str(counter) + "." + str(s) + ".pdb"] = (pdbf, rota["name"]) f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def __organizeClustersByFixedFragAndIDComb(ClusAll): ClutAll = [] for clu in ClusAll: dicsupp = {} for item in clu["heapSolutions"]: prio, rota = item fixed = [] if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: fixed.append(rotafi["euler"]) jso = json.dumps(fixed) if jso in dicsupp.keys(): dicsupp[jso].append(rota) else: dicsupp[jso] = [rota] for key in dicsupp.keys(): dirf = {"heapSolutions": ADT.Heap()} for rota in dicsupp[key]: dirf["heapSolutions"].push((-1 * rota["llg"], -1 * rota["zscore"]), rota) ClutAll.append(dirf) return ClutAll def startFTF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False): if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): #print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FTF" + "\n") f.write('HKLIN "tran.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test fast translation for models" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if PACK_TRA: f.write("TRANS PACK USE ON" + "\n") f.write("TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n") else: f.write("TRANS PACK USE OFF" + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") else: f.write("TNCS USE ON\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " # " + rotafi["name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " # " + rota["name"] + " CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RFZ " + str( rota["zscore"]) + " # " + rotafi["name"] + "\n") cou += 1 if sampl != -1: f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("TRANSLATE VOLUME FULL" + "\n") f.write("PEAKS TRA SELECT PERCENT" + "\n") f.write("PEAKS TRA CUTOFF " + str(save_tra) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE TRANSLATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFTFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startFTFOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, usePDO=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False): # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 ClusAll = __organizeClustersByFixedFragAndIDComb(ClusAll) used_ensembles = {} for cds in range(len(ClusAll)): clu = ClusAll[cds] s = 0 nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = (rota["llg"], rota["zscore"]) if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 for pdbf in list_pdbs.keys(): try: val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # print "pdb",cm.get_remote_pwd(),os.path.join(cm.remote_library_path,os.path.basename(pdbf)) for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # f = open(outputDirectory+"/"+str(counter)+".sh", "w") headlines = "" enselines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FTF" + "\n" headlines += 'HKLIN "tran.mtz"' + "\n" headlines += 'HKLOUT OFF' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' headlines += "TITLE Test fast translation for models" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" if PACK_TRA: headlines += "TRANS PACK USE ON" + "\n" headlines += "TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n" else: headlines += "TRANS PACK USE OFF" + "\n" headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" if not USE_TNCS: headlines += "TNCS USE OFF\n" else: headlines += "TNCS USE ON\n" headlines += "SEARCH METHOD FAST" + "\n" headlines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: headlines += "SOLPARAMETERS BULK USE ON" + "\n" headlines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: headlines += "SOLPARAMETERS BULK USE OFF" + "\n" if frag_fixed > 1: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" # print "=================FIXED==================",counter for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # print "EULER:",rotafi["euler"],"FRAC",rotafi["frac"] if not str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str( list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rotafi["name"]] else: used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rotafi["name"]) taillines += "SOLU 6DIM ENSE " + \ used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " # " + rotafi[ "name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prifi[0]) + " ZSCORE: " + str( prifi[1]) + "\n" # print "========================================",counter else: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" s = 0 # print "==============================ROTATIONS=============================",counter for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) # print "NAME:",rota["name"],"EULER:",rota["euler"],"FRAC",rota["frac"] if "n_prev_cluster" in rota: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota[ "name"] + " CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n" else: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(RMSD) + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota["name"] + "\n" s += 1 # print "========================================================================",counter if sampl != -1: taillines += "SAMPLING TRA " + str(sampl) + "\n" taillines += "TRANSLATE VOLUME FULL" + "\n" taillines += "PEAKS TRA SELECT PERCENT" + "\n" taillines += "PEAKS TRA CUTOFF " + str(save_tra) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE TRANSLATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFTFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all translations with a peak >= " + str(save_tra) print "Saving all translations with a peak >= " + str(save_tra) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) if numall > 0: for lo in range(numall): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def put_same_unit_cell_and_symm(list_frags, cell_dim, ref_point): struss = [] lista_trials = [("+", 0.0), ("*", -1.0), ("+", -1.0), ("+", 1.0)] parameters = {} p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None cx = None cy = None cz = None if len(ref_point) > 0: cx = ref_point[0] cy = ref_point[1] cz = ref_point[2] print "Center of mass of ent:", cx, cy, cz for frag in list_frags: print "Processing new fragment..." frag_orig = [] frag_move = [] no_convert = False # no_trial_zero = False all_first_point = [] p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None for atom in frag: if isinstance(atom, list): ux = atom[0] uy = atom[1] uz = atom[2] else: atm = atom.get_coord() ux = atm[0] uy = atm[1] uz = atm[2] nx, ny, nz, parameters = convertFromOrthToFrac(ux, uy, uz, cell_dim, parameters) # print print "Original fractional", nx, ny, nz frag_orig.append(numpy.array([ux, uy, uz])) if no_convert: continue if wx is not None and wy is not None and wz is not None: d1 = numpy.sqrt(((wx - ux) * (wx - ux)) + ((wy - uy) * (wy - uy)) + ((wz - uz) * (wz - uz))) # print "distance1",d1,"--",wx,wy,wz,"-",ux,uy,uz # wx = ux # wy = uy # wz = uz if nx < 0: nx = -1 * nx if ny < 0: ny = -1 * ny if nz < 0: nz = -1 * nz nx = nx - numpy.modf(nx)[1] ny = ny - numpy.modf(ny)[1] nz = nz - numpy.modf(nz)[1] if len(p) > 0: hp = ADT.Heap() for g in p: px = g[0] py = g[1] pz = g[2] # print "Using",px,py,pz try: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: # no_convert = True if ada[0] == "+": sx = nx + ada[1] elif ada[0] == "*": sx = nx * ada[1] if adb[0] == "+": sy = ny + adb[1] elif adb[0] == "*": sy = ny * adb[1] if adc[0] == "+": sz = nz + adc[1] elif adc[0] == "*": sz = nz * adc[1] # print "Application of trial",sx,sy,sz sx, sy, sz, parameters = convertFromFracToOrth(sx, sy, sz, cell_dim, parameters) d2 = numpy.sqrt( ((px - sx) * (px - sx)) + ((py - sy) * (py - sy)) + ((pz - sz) * (pz - sz))) # print "distance2",d2,"--",px,py,pz,"-",sx,sy,sz # print "LOOK AT IT:",d1-d2,ada,adb,adc df = abs(d1 - d2) ang = ( 57.2957795 * angle_between([wx - ux, wy - uy, wz - uz], [px - sx, py - sy, pz - sz], [0.0, 0.0, 1.0], signed=False)) # if abs(d1-d2) <= 0.05 and (57.2957795*angle_between([wx-ux,wy-uy,wz-uz],[px-sx,py-sy,pz-sz],[0.0,0.0,1.0],signed=False)) <= 0.05: if df <= 9.0 and ang <= 9.0: # no_convert = False if len(p) > 1 and cx is not None and cy is not None and cz is not None: d3 = numpy.sqrt(((cx - sx) * (cx - sx)) + ((cy - sy) * (cy - sy)) + ( (cz - sz) * (cz - sz))) hp.push((df, ang, d3), (sx, sy, sz, px, py, pz)) # it is a MinHeap # print "distance found",d3,sy,sy,sz else: hp.push((df, ang, 1), (sx, sy, sz, px, py, pz)) # raise Exception # print "It is compatible:",d1-d2,ada,adb,adc except: pass if hp.len() > 0: no_convert = False item = hp.pop() nx = item[1][0] ny = item[1][1] nz = item[1][2] px = item[1][3] py = item[1][4] pz = item[1][5] print "minim values", item[0], nx, ny, nz # df,ang,dis = item[0] # if df > 1.5 or ang > 1.0: # print "Values are too far from correct, cannot be accepted" # no_convert = True else: no_convert = True if not no_convert: frag_move[-1] = numpy.array([px, py, pz]) # break p = [] else: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: if ada[0] == "+": qx = nx + ada[1] elif ada[0] == "*": qx = nx * ada[1] if adb[0] == "+": qy = ny + adb[1] elif adb[0] == "*": qy = ny * adb[1] if adc[0] == "+": qz = nz + adc[1] elif adc[0] == "*": qz = nz * adc[1] qx, qy, qz, parameters = convertFromFracToOrth(qx, qy, qz, cell_dim, parameters) p.append([qx, qy, qz]) if no_convert: print "ATTENTION CAN'T FIND A FRAC COORDS FOR ATOM. USING ORIGINAL POSITIONS..." continue if len(p) == 0: p.append([nx, ny, nz]) wx = ux wy = uy wz = uz frag_move.append(numpy.array([nx, ny, nz])) q = 0 for atom in frag: if no_convert: atom.set_coord(frag_orig[q]) else: atom.set_coord(frag_move[q]) q += 1 struss.append(atom) return struss def translateListByFrac(num, num2, struct, frac, cell_dim, return_atoms=False): structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) if not return_atoms: structure.append(numpy.array([nx, ny, nz])) else: atom.set_coord(numpy.array([nx, ny, nz])) if not return_atoms: return structure else: return struct def rotateListByMatrix(num, num2, struct, matrices, cell_dim, return_atoms=False): structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz if not return_atoms: structure.append(numpy.array([x, y, z])) else: atom.set_coord(numpy.array([x, y, z])) if not return_atoms: return structure else: return struct def rotateStructureByMatrix(num, num2, struct, matrices, outputPath, cell_dim, writePDB=True, filename=None): structure = copy.deepcopy(struct) if not os.path.exists(outputPath): os.makedirs(outputPath) if writePDB: if filename == None: pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") else: pdb = open(outputPath + str(filename) + "_rot.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz atom.set_coord(numpy.array([x, y, z])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() else: return structure def rotateStructureByListCoord(num, num2, structure, listCoord, outputPath, title): pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") indice = 0 pdb.write("REMARK TITLE " + title + "\n") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): nx = (listCoord[indice]).get_coord()[0] ny = (listCoord[indice]).get_coord()[1] nz = (listCoord[indice]).get_coord()[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) indice += 1 pdb.close() def angleRadBetweenVectors(vec1, vec2): X1 = vec1[0] Y1 = vec1[1] Z1 = vec1[2] X2 = vec2[0] Y2 = vec2[1] Z2 = vec2[2] scaP2 = (X1 * X2) + (Y1 * Y2) + (Z1 * Z2) parallequiv = (numpy.sqrt((X1 * X1) + (Y1 * Y1) + (Z1 * Z1))) * (numpy.sqrt((X2 * X2) + (Y2 * Y2) + (Z2 * Z2))) cosTetaReal = scaP2 / parallequiv # print "scaP2",scaP2 # print "parallequiv",parallequiv # print "cosTetaReal",cosTetaReal # Take care of roundoff errors # cosTetaReal = numpy.min(cosTetaReal,1) # cosTetaReal = numpy.max(-1,cosTetaReal) TetaReal = numpy.arccos(cosTetaReal) return TetaReal def matrixFromEulerAngles2(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ Conversion 323 from James Diebel 2006 """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = (numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = -1 * numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def matrixFromEulerAngles(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ http://en.wikipedia.org/wiki/Euler_angles Relationship to other representations Rotation matrix: ZYZ """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = ((-1) * numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) - (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) + (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = ((-1) * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = (-1) * numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def clusterAtOnceSols(DicParameters, listaAllsol, isArcimboldo, renamePDBs, rnp_sol, baseDir, name, quate, laue, listNCS, excludeZSCORE, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, convNames, tops=None, LIMIT_CLUSTER=None, giveids=False): riprova = True while riprova: try: traslazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops) riprova = False except: # print "Error...Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) num = 0 nclu = None listaFileDel = [] listaKeyDel = [] listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} dictioNameClus = {} clus = -1 brat = {} nfixfrags = 0 fullcluname = "" for rotaz in traslazioni: num += 1 # print "Validating "+str(num)+"\\"+str(len(traslazioni))+" traslations..." rotaz["numInSol"] = num if len(convertnames.keys()) > 0: ensembles[rotaz["name"]] = ensembles[convertnames[rotaz["name"]]] if "fixed_frags" in rotaz and (len(convertnames.keys()) > 0 or not isArcimboldo): for fi in rotaz["fixed_frags"]: ensembles[fi["name"]] = ensembles[rotaz["name"]] nfixfrags = len(rotaz["fixed_frags"]) + 1 if giveids: nameFi = ensembles[rotaz["name"]] if rotaz["name"] not in listaKeyDel: listaKeyDel.append(rotaz["name"]) nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "-" + str(rotaz["numInSol"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) # print "nome:",rotaz["name"]+"-"+str(rotaz["numInSol"]),"file:",nameFi ensembles[rotaz["name"] + "-" + str(rotaz["numInSol"])] = nameFi # nuovoPath rotaz["name"] = rotaz["name"] + "-" + str(rotaz["numInSol"]) # if nameFi not in listaFileDel: # listaFileDel.append(nameFi) # print "--!!!!--",rotaz["name"] if (len(rotaz["fixed_frags"]) == 0 or LIMIT_CLUSTER != None) and len(dizioClu.values()) > 0: nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = str(nclu) rotaz["n_prev_cluster"] = nclu elif len(rotaz["fixed_frags"]) > 0 and len(dizioClu.values()) > 0: # print "--------dizioClu--------" # print dizioClu # print rotaz["name"] # print "------------------------" nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = rotaz["fixed_frags"][-1]["original_rotcluster"] + "_" + str(nclu) rotaz["n_prev_cluster"] = __getIDClusterFromDescription(rotaz["original_rotcluster"]) clus = rotaz["n_prev_cluster"] if rotaz["zscore"] < excludeZSCORE: continue rotaz["elong"] = 0 # rotaz["fixed_frags"] = fixed listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInSol"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInSol"])] = rotaz dictioNameClus[rotaz["name"]] = rotaz["original_rotcluster"] fullcluname = rotaz["original_rotcluster"] if mode == "RNP": for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tops != None and cud > tops: try: os.remove(baseDir + ler) except Exception: pass continue for bla in listaRotNumInRlist: # print "====",bla[1],"======",tud,cud,"....",int(name) if bla[1] == cud and tud == int(name): rnp_sol[ler] = bla[0] brat[ler] = bla[0] break if mode == "RNP" and renamePDBs: if isArcimboldo: for ler in brat.keys(): cud = int((ler.split("."))[0]) # print ler, brat[ler], convNames[ler][0], cud, int(name) if cud == int(name): f = open(baseDir + ler, "r") allf = f.read() f.close() # print dictioNameClus nede = baseDir + os.path.basename(convNames[ler][0])[:-4] + brat[ler].split("ensembleID")[1] # print ".....--..------.....",nede,laue,os.path.exists(os.path.join(baseDir,"../../3_FTF_LIBRARY/")),os.path.join(baseDir,"../3_FTF_LIBRARY/") if str(laue) == "1" and not os.path.exists(os.path.join(baseDir, "../../3_FTF_LIBRARY/")): nede += "-1.pdb" else: nede += ".pdb" f = open(nede, "w") druppo = str(dictioNameClus[brat[ler]]) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo + "\n")) f.write(allf) f.close() os.remove(baseDir + ler) # shutil.move(baseDir+ler,baseDir+os.path.basename(convNames[ler][0])[:-4]+brat[ler].split("ensembleID")[1]+".pdb") # Associare a .n.pdb l'ensembleID del primo nel .n.pdb ensembles[brat[ler]] = nede else: f = open(baseDir + name + ".1.pdb", "r") allf = f.read() f.close() # print "+++++++++++",convNames[name+".1.pdb"] nomed = baseDir + os.path.basename(convNames[name + ".1.pdb"][0]) if len(convNames[name + ".1.pdb"][1].split("-")) > 1: nomed = nomed[:-4] + "-" + convNames[name + ".1.pdb"][1].split("-")[1] + ".pdb" f = open(nomed, "w") druppo = str(fullcluname) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(allf) f.close() os.remove(baseDir + name + ".1.pdb") # shutil.move(baseDir+name+".1.pdb",baseDir+os.path.basename(convNames[name+".1.pdb"][0])) ensembles[convNames[name + ".1.pdb"][1]] = nomed listaAllsol[0] += listaQuaternions listaAllsol[1] += listaRotNumInRlist listaAllsol[2].update(listaDictioNameNumInRlist) if giveids: for ele in listaKeyDel: del ensembles[ele] return ensembles, listaAllsol, nfixfrags def clusterAllRotInList(DicParameters, listrot, isArcimboldo, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False): global LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID = 0 ClusAll = [] RotClu = [] ensembles = {} listaAllrot = [[], [], {}, [[], RotClu]] num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "////////",len(listrot) for ro in listrot: rotaz, pdbname = ro num += 1 # print "Checking the "+str(num)+"\\"+str(len(listrot))+" rotation..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue fixed = [] if "fixed_frags" in rotaz.keys(): fixed = rotaz["fixed_frags"] name = 0 if giveids: nameFi = pdbname nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz if not isArcimboldo or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif not isArcimboldo or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: \ # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro \ # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return listaAllrot[3][0], listaAllrot[3][1], ensembles else: return listaAllrot[3][0], listaAllrot[3][1] def clusterAtOnce(DicParameters, listaAllrot, isArcimboldo, baseDir, name, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False, lastFile=False): global LAST_AVAILABLE_ROTID riprova = True ClusAll = listaAllrot[3][0] RotClu = listaAllrot[3][1] while riprova: try: if mode == "FRF": rotazioni, fixed = readRotationsFRF(baseDir, name, quate, tops=tops) elif mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops) riprova = False except: print "Error...Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # print "lette "+str(len(rotazioni))+" rotazioni..." # print "i clusters sono ", len(Clusters) num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "////////",len(rotazioni) for rotaz in rotazioni: num += 1 # print "Collocando la "+str(num)+"\\"+str(len(rotazioni))+" rotazione..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue if giveids: nameFi = ensembles[rotaz["name"]] # del ensembles[rotaz["name"]] nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) rotaz["fixed_frags"] = fixed # listaQuaternions.append([rotaz["quaternion"][0],rotaz["quaternion"][1],rotaz["quaternion"][2],rotaz["quaternion"][3]]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz # print "((((((((((((((((((((((((((" # for cuu in sorted(listaQuaternions): # print cuu,cuu[0]+cuu[1]+cuu[2] # print "))))))))))))))))))))))))))" # print "Arcimboldo?",isArcimboldo,"name",name,"type(name)",type(name) if (not isArcimboldo and name == "0") or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): if len(ClusAll[inderec]["heapSolutions"].asList()) == 0: continue prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif (not isArcimboldo and name != "0") or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return ensembles, listaAllrot else: return listaAllrot def __getIDClusterFromDescription(nameClustDesc): global MAP_OF_ROT_COMB clut = tuple(map(lambda x: int(x), nameClustDesc.split("_"))) if len(clut) == 1: return clut[0] if clut in MAP_OF_ROT_COMB.keys(): return MAP_OF_ROT_COMB[clut] else: # NOTE: if we create the permutations than we will not distinguish 0,1 from 1,0 it means that # that we can choose just one combinations to follow up (0,1) or (1,0) but not both # if we make this distinction here we will have two different type of combinations one for (0,1) and another for (1,0) qp = MAP_OF_ROT_COMB.values() t = 1 if len(qp) > 0: t = max(qp) + 1 MAP_OF_ROT_COMB[clut] = t return t def __mergeEquivalentRotCombination(): global MAP_OF_ROT_COMB equals = {} value_done = [] for key in MAP_OF_ROT_COMB.keys(): value = MAP_OF_ROT_COMB[key] if value not in value_done: value_done.append(value) equals[value] = [] e = itertools.permutations(key) for q in e: if q in MAP_OF_ROT_COMB.keys() and MAP_OF_ROT_COMB[q] != value: equals[value].append(MAP_OF_ROT_COMB[q]) value_done.append(MAP_OF_ROT_COMB[q]) return equals def saveRotations(DicParameters, listaAllrot, LIMIT_CLUSTER=None, applyNameFilter=False): ClusAll = listaAllrot[3] num = 0 rotazioni = listaAllrot[2].values() pdb_done = [] indes = len(ClusAll) for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): # print "/////" # print rotaz # print "////" if applyNameFilter and (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) if "n_prev_cluster" not in rotaz.keys(): if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) indes += 1 if len(ClusAll) <= rotaz["n_prev_cluster"]: while len(ClusAll) < rotaz["n_prev_cluster"] + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[rotaz["n_prev_cluster"]]["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) return ClusAll def angle_between(A, B, N, signed=True): # ANGLE BETWEEN TWO 3D VECTORS: # 1- dot(norm(A),norm(B)) (ANGLES UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 2- arcos(dot(A,B)/(|A|*|B|)) (ANGLE UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 3- arctan2(|cross(A,B)|,dot(A,B)) (ANGLE UNSIGNED BUT NOT PROBLEMS OF ROUNDINGS # define a vector NORM ex.: N = [0,0,1] # sign = dot(NORM,cross(A,B)) # if sign < 0 then ANGLE measured in 3 should be negative CrossX = A[1] * B[2] - A[2] * B[1] CrossY = A[2] * B[0] - A[0] * B[2] CrossZ = A[0] * B[1] - A[1] * B[0] fCross = numpy.sqrt(CrossX * CrossX + CrossY * CrossY + CrossZ * CrossZ) scaP2 = (A[0] * B[0]) + (A[1] * B[1]) + (A[2] * B[2]) Teta_2 = numpy.arctan2(fCross, scaP2) if signed: sign = (N[0] * CrossX) + (N[1] * CrossY) + (N[2] * CrossZ) if sign < 0: Teta_2 = -Teta_2 return Teta_2 else: return Teta_2 def simpleDistributionOrientationComparison4(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None try: structureA = [[-0.048, 3.749, 0.000], [92.111, 48.141, 16.362]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if "angle" in rot2.keys(): if rot2["angle"] >= TETA_1: rot2["angle"] = TETA_1 else: rot2["angle"] = TETA_1 if TETA_1 <= 1000 and TETA_1 <= threshold: return True else: return False def simpleDistributionOrientationComparison3(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getAtomsList("A", convNames[rot1["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model", rot1["name"], "not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) try: structureA = Bioinformatics.getAtomsList("A", convNames[rot2["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 2" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 print "ATTENTION: Model", rot2["name"], "not found... Using just the model of rot1" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison2(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot1["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 try: structureA = Bioinformatics.getStructure("A", convNames[rot2["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() if print_angles: print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim): # if not (os.path.exists("./temp/")): # os.makedirs("./temp/") structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/0_0_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru00,False,False,"stru00") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/0_0_rot.pdb", False) Astructure = tupleResult[0] AlistFrags = tupleResult[1] # Bq = rot2["quaternion"] Bq = rot2["rotationMatrices"] rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/1_1_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru11,False,False,"stru11") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/1_1_rot.pdb", False) Bstructure = tupleResult[0] BlistFrags = tupleResult[1] """ #============TEMPORANEO=============== maxl = 0 ind = 0 for i in range(len(AlistFrags)): if maxl < (AlistFrags[i])["fragLength"]: ind = i maxl = (AlistFrags[i])["fragLength"] AlistFrags = [AlistFrags[ind]] BlistFrags = [BlistFrags[ind]] #===========TEMPORANEO================ """ # print "lenA",len(AlistFrags),"lenB",len(BlistFrags) nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(AlistFrags, BlistFrags, threshold, shift, where) """ print "---------------" print str(rot1["quaternion"]) print str(rot2["quaternion"]) print str(anyWay) print nWindows,len(comp_windows) print "---------------" """ # os.remove("./temp/0_0_rot.pdb") # os.remove("./temp/1_1_rot.pdb") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]) print "---------------" """ else: """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]),shift,where print "---------------" """ return False, len(comp_windows[t]) return result, None def compareRotation(rot1, rot2, treshold, mode, quate, laue, listNCS, convNames, cell_dim, evaLLONG, print_angles=False): if mode == "distributionCV": investigate = False for shift in range(8): if shift > 0 and not evaLLONG: break if not evaLLONG: shift = 0 else: shift = shiftew firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "A", cell_dim) if firstResult[0]: return True, shift else: # cerca equivalenza per simmetria contr = 0 secondRe = None for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondRe = (True, shift) break elif secondResult[1] > 0: investigate = True if secondRe != None: return secondRe else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if thirdResult[0]: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, shift if not firstResult[1] and not investigate: break if evaLLONG: investigate = False for shift in range(1, 8): firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "B", cell_dim) if firstResult[0]: return True, -1 * shift else: # cerca equivalenza per simmetria contr = 0 for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "B", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) return True, -1 * shift elif secondResult[1] > 0: investigate = True if not firstResult[1] and not investigate: break return False, 0 elif mode == "rot_matrices": firstResult = simpleDistributionOrientationComparison4(rot1, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if firstResult: return True, 0 else: # cerca equivalenza per simmetria contr = 0 secondResult = False for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if secondResult: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondResult = True break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if thirdResult: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, 0 return False, 0 elif mode == "quaternion": q1 = rot1["quaternion"] q2 = rot2["quaternion"] # if "simmetry_rotated" in rot1: # q1 = rot1["simmetry_rotated"] # if "simmetry_rotated" in rot2: # q2 = rot2["simmetry_rotated"] firstResult = simpleQuaternionComparison(q1, q2, treshold, quate) # print "Direct compare:",firstResult if firstResult: return True, 0 else: # cerca equivalenza per simmetria secondResult = False for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q secondResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if secondResult: rot1["simmetry_rotated"] = new_quat # return True,0 break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rtq = quate.convertRotMatrixToQuaternion2(ncs) new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q thirdResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if thirdResult: rot1["simmetry_rotated"] = new_quat # print rot1["name"], "COMPATIBLE WITH", new_quat return True, 0 return False, 0 def simpleQuaternionComparison2(rot1q, rot2q, treshold, quate): """ Thesis Robert Adam Nicholls """ angle = quate.QuaternionDotProduct(rot1q, rot2q) # 2. n(th) Chebyshev Polynomial angle = 2 - 2 * (angle ** 2) print "distance cosin", angle # 3. Compute the arcosin of the angle of the wauternion above angle = numpy.arccos(angle) # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree print "Angle degreee: ", angle_degree if angle_degree <= treshold: return True else: return False def simpleQuaternionComparison(rot1q, rot2q, treshold, quate): """ James Diebel 2006 """ ############## # NEW METHOD # ############## # 1. Compute the quaternion inverse rot1q inv = quate.QuaternionInverse(rot1q) # print inv # 2. Compute the product of rot2q and thr inverse of rot1q res = quate.QuaternionProduct(rot2q, inv) # res = quate.QuaternionProduct(inv,rot2q) # print "QProduct",res # print "QProduct2",quate.QuaternionProduct(inv,rot2q) # 3. Compute the arccos of the angle of the quaternion above angle = numpy.arccos(res[3]) # 4. Multiply the angle for 2.0f to have the angle in radians angle_rad = angle * 2.0 # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle_rad * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree # print "Angle degreee: ",angle_degree if angle_degree <= treshold: return True else: return False # tetaThreshRad = (treshold*2*numpy.pi)/360 # treshAngleRot = (treshAngleRot*2*numpy.pi)/360 # tetaVect = angleRadBetweenVectors(rot1q[:-1], rot2q[:-1]) # uno = (tetaVect <= tetaThreshRad) # due = (numpy.abs(rot1q[-1]-rot2q[-1]) <= treshAngleRot) # print "//////////////" # print "Comparing:" # print str(rot1q) # print str(rot2q) """ dot = (rot1q[3]*rot2q[3]) + (rot1q[0]*rot2q[0]) + (rot1q[1]*rot2q[1]) + (rot1q[2]*rot2q[2]) #print "dot is:",abs(dot),(1.0-abs(dot)),treshold #print "////////////" dot = abs(dot) if (1.0-dot) <= treshold: #if ((rot2q[3]<0 and rot1q[3]<0) or (rot2q[3]>=0 and rot1q[3]>=0)): # return True #else: # return False return True else: return False """ # print "======================================" # print rot1q # print rot2q # print uno,due # print tetaVect, tetaThreshRad, uno # print rot1q[-1], rot2q[-1], treshAngleRot, due # print "======================================" # return (uno and due) def readTranslationsFTF(baseDir, name, quate, mode, tops=None): sol = baseDir + name + ".sol" out = baseDir + name + ".out" sh = baseDir + name + ".sh" fixed = [] diziona = {} FOM = {} savellg = 0.0 savezscore = 0.0 diziocorresp = {} dinuovoname = "" cutoff = 0 if mode != "PICASSO": fi = open(out, "r") traFOM = fi.readlines() fi.close() if len(traFOM) == 0: raise Exception(".out not ready!") if not os.path.exists(sol): return ([], [], {}, {}) fi = open(sh, "r") script = fi.readlines() fi.close() fixed = [] first = False second = False nfixed = 0 nfrags = 0 for linea in script: lista = linea.split() if lista[0] == "PACK" and lista[1] == "CUTOFF": cutoff = int(lista[2]) if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: second = True nfrags = 0 # if mode != "RNP": # break if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: dizio = {} dizio["original_rotcluster"] = str(lista[17]) dizio["zscore"] = float(lista[21]) dizio["llg"] = float(lista[19]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) else: dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) elif mode in ["RNP", "PACK"]: dizio = {} dizio["original_rotcluster"] = str(lista[15]) dizio["zscore"] = float(lista[19]) dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) else: dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) nfixed = nfrags - 1 if mode in ["PACK"] and not second: savellg = fixed[-1]["llg"] savezscore = fixed[-1]["zscore"] dinuovoname = fixed[-1]["name"] fixed = fixed[:-1] elif mode in ["RNP", "PACK"]: fixed = [fixed[i:i + nfixed + 1] for i in range(0, len(fixed), nfixed + 1)] if mode == "RNP": startc = 0 for linea in traFOM: if startc >= 1: led = linea.strip().split() if len(led) == 0: break if led[0] != "---": zs = 0.0 if led[5] != "n/a": zs = float(led[5]) if led[0].startswith("Top"): diziocorresp[int(led[0].split("Top")[1])] = ( int(led[0].split("Top")[1]), float(led[6]), zs) # PHASER 2.6.0 else: diziocorresp[int(led[0])] = (int(led[0]), float(led[6]), zs) # diziocorresp[int(led[1])] = (int(led[0]),float(led[4])) #{num_in_sol:(num_in_sh,llg_refined)} # if linea.strip().startswith("#+ #* (Initial LLG & Rval)"): # if linea.strip().startswith("#+ #* (Start LLG Rval TFZ"): #PHASER 2.5.5 if linea.strip().startswith("#out =#out"): # PHASER 2.7.9 startc += 1 elif mode == "PACK": startc = 0 diziocorresp = {} for linea in traFOM: if startc >= 1: led = linea.split() if len(led) == 0: break if led[2].startswith(">"): led[2] = 10000 diziocorresp[int(led[0])] = float(led[2]) # PHASER 2.7.x {num_in_sh:percent_clashes} # if linea.strip().startswith("# # #Clash Packs"): #PHASER 2.5.5 if linea.strip().startswith("#in #out"): # PHASER 2.7.9 startc += 1 diziona = {} list_trial = [] if mode == "TRA": for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "TRIAL": if len(lista) == 14: diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[13]) list_trial.append(lista[11]) for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[17]) list_trial.append(lista[15]) elif mode == "PACK" or mode == "RNP": leggiTra1 = False leggiTra2 = False temp = [] for linea in script: lista = linea.split() if lista[0] == "ROOT": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] break if lista[0] == "SOLU" and lista[1] == "SET": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2: leggiTra1 = False dizio = {} dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["n_prev_cluster"] = int(lista[15]) temp.append(dizio) FOM = {} start = False startskip = -1 section = False for linea in traFOM: if startskip > 0: startskip -= 1 continue if startskip == 0: start = True startskip = -1 if linea.startswith("Number LLG Z-Score"): section = True continue if mode != "RNP" and section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if mode == "RNP" and linea.strip() == "Refinement Table (Sorted)": startskip = 3 continue if mode != "RNP" and start and linea.startswith("$$"): start = False break if mode == "RNP" and start and len(linea.split()) == 0: start = False break if start: lista = linea.strip().split() if mode == "TRA": FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] elif mode == "RNP": zs = 0.0 if lista[0] != "---": if lista[5] != "n/a": zs = float(lista[5]) if lista[0].startswith("Top"): FOM[int(lista[0].split("Top")[1])] = [float(lista[6]), zs] # PHASER 2.6.0 else: FOM[int(lista[0])] = [float(lista[6]), zs] # PHASER 2.6.0 # FOM[int(lista[0])] = [float(lista[4]),0.0] fi = open(sol, "r") traslazioni = fi.readlines() fi.close() tra = [] numero = 1 leggiTra1 = False leggiTra2 = False temp = [] PREVAL = {} numera2 = 0 tcns = False convertnames = {} nameRota = "" vrms = None for linea in traslazioni: if tops != None and numero > tops: break lista = linea.split() # Following 2 lines are for PHASER 2.5.5 if lista[0] == "SOLU" and lista[1] == "ENSEMBLE": if "#VRMS" in lista: ind = lista.index("#VRMS") + 1 if len(lista) > ind: if vrms is None or vrms > float(lista[ind]): vrms = float(lista[ind]) continue # New Phaser have this new line to skip if lista[0] == "SOLU" and lista[1] == "HISTORY": valua = lista[2] if "PAK" in lista[2]: valua = lista[3] if mode == "TRA": if "RF/TF" in valua: index_rlist = int(valua.split("RF/TF")[1].split("/")[0][1:]) - 1 nameRota = list_trial[index_rlist] else: index_rlist = int(valua.split("RF")[1].split(")")[0][1:]) - 1 nameRota = list_trial[index_rlist] continue # Phaser 2.6.1 if lista[0] == "SOLU" and lista[1] == "SET": if lista[-1] == "+TNCS": tcns = True if mode == "PICASSO": numera2 += 1 PREVAL[str(numera2)] = [float(((lista[5]).split("="))[-1]), float(((lista[3]).split("="))[-1])] # elif mode == "RNP": # numera2 += 1 # PREVAL[str(numera2)] = float(lista[-1][5:]) if leggiTra2: leggiTra2 = False # print "ACTUAL VRMS1 is",vrms if vrms is not None: (temp[-1])["vrms"] = vrms if mode == "PACK": (temp[-1])["zscore"] = savezscore (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP": # if lista[-1].startswith("TFZ=="): # (temp[-1])["zscore"] = PREVAL[str(numero)] # else: # (temp[-1])["zscore"] = 0.0 (temp[-1])["zscore"] = diziocorresp[numero][2] (temp[-1])["llg"] = diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] else: (temp[-1])["zscore"] = FOM[numero][1] (temp[-1])["llg"] = FOM[numero][0] if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw # print "///////////////////////////////////////" # print temp[-1] # print "///////////////////////////////////////" tra.append((temp[-1])) temp = [] numero += 1 leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2: leggiTra1 = False dizio = {} add = 0 if lista[5] == "EULER": add += 1 dizio["euler"] = [float(lista[5 + add]), float(lista[6 + add]), float(lista[7 + add])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = nameRota # str(lista[3]) dizio["frac"] = [float(lista[9 + add]), float(lista[10 + add]), float(lista[11 + add])] dizio["bfactor"] = float(lista[13 + add]) dizio["elong"] = 0 temp.append(dizio) if len(temp) > 0: # print "ACTUAL VRMS2 is",vrms if vrms is not None: (temp[-1])["vrms"] = vrms if mode == "PACK": (temp[-1])["zscore"] = savezscore (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP": (temp[-1])["llg"] = diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] (temp[-1])["zscore"] = diziocorresp[numero][2] else: (temp[-1])["zscore"] = FOM[numero][1] (temp[-1])["llg"] = FOM[numero][0] if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw tra.append((temp[-1])) temp = [] numero += 1 # print "number of translation read", len(tra) # print "number of fixed",len(fixed) if mode == "PACK" and len(diziocorresp.keys()) > 0: pat = [] for sdf in range(len(fixed)): if diziocorresp[sdf + 1] <= cutoff: rt = fixed[sdf][-1] rt["fixed_frags"] = fixed[sdf][:-1] # print "sdf",sdf,type(rt),len(rt["fixed_frags"]) pat.append(copy.deepcopy(rt)) tra = pat for trrr in range(len(tra)): trasla = tra[trrr] if mode == "RNP": # print "trrr+1",trrr+1 # print "corrispon",diziocorresp[trrr+1][0] # print "len fixed",len(fixed) # print fixed[diziocorresp[trrr+1][0]-1] # print fixed[diziocorresp[trrr+1][0]-1][:-1] # print trasla["name"] trasla["fixed_frags"] = copy.deepcopy( fixed[diziocorresp[trrr + 1][0] - 1][:-1]) # ultimo di ogni sottolista di fixed e la soluzione stessa # trasla["original_rotcluster"] = fixed[diziocorresp[trrr+1][0]-1][0]["n_prev_cluster"] elif not ((mode == "PACK" and len(diziocorresp.keys()) > 0) or tcns): trasla["fixed_frags"] = copy.deepcopy(fixed) baseCombi = "" for fi in trasla["fixed_frags"]: if len(baseCombi) == 0: baseCombi += fi["original_rotcluster"] else: baseCombi += "_" + fi["original_rotcluster"] fi["original_rotcluster"] = baseCombi fi["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) return tra, fixed, diziona, convertnames def readRotationsFRF(baseDir, name, quate, tops=None): rlist = os.path.join(baseDir, name + ".rlist") out = os.path.join(baseDir, name + ".out") sh = os.path.join(baseDir, name + ".sh") fi = open(sh, "r") script = fi.readlines() fi.close() """ dati_fixed = [] for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": dati_fixed.append((int(lista[15]),float(lista[17]),float(lista[19]))) """ fixed = [] first = False baseCombi = "" for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: break if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 20: dizio = {} if len(baseCombi) == 0: baseCombi += str(lista[15]) else: baseCombi += "_" + str(lista[15]) dizio["original_rotcluster"] = baseCombi dizio["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) dizio["zscore"] = float(lista[19]) dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 fixed.append(dizio) fi = open(rlist, "r") rotazioni = fi.readlines() fi.close() fi = open(out, "r") rotFOM = fi.readlines() fi.close() FOM = {} start = False section = False for linea in rotFOM: if linea.startswith("Number LLG Z-Score"): section = True continue if section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if start and linea.startswith("$$"): start = False break if start: lista = linea.split() if len(lista) == 3: FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] else: start = False rota = [] numero = 1 for linea in rotazioni: if tops != None and numero > tops: break lista = linea.split() if lista[0] == "SOLU" and lista[1] == "TRIAL": dizio = {} dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["zscore"] = FOM[numero][1] dizio["llg"] = FOM[numero][0] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [0.0, 0.0, 0.0] dizio["bfactor"] = 0.0 dizio["elong"] = 0 rota.append(dizio) numero += 1 return rota, fixed def convertFromFracToOrth(t1, t2, t3, cell_dim, parameters): if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A tz = t3 / parameters["uu"] ty = (t2 - tz * parameters["vv"]) / parameters["uuy"] tx = (t1 - ty * parameters["vvz"] - tz * parameters["uuz"]) / parameters["vvy"] return tx, ty, tz, parameters def convertFromOrthToFrac(x, y, z, cell_dim, parameters): if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A nx = (x * parameters["vvy"]) + (y * parameters["vvz"]) + (z * parameters["uuz"]) ny = (y * parameters["uuy"]) + (z * parameters["vv"]) nz = z * parameters["uu"] return nx, ny, nz, parameters def rotateStructureByQuaternion(num, num2, structure, quate, q, outputPath, mode="rotateByOrthCoord", cell_dim=[]): # TODO: This methods does not work properly. Both rotateByOrthCoord and rotateByCrystCoord does not produce the correct rotation qConj = quate.QuaternionConjugate(q) pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z tmpQ = [0.0, 0.0, 0.0, 0.0] tmpQ[3] = 0 tmpQ[0] = x tmpQ[1] = y tmpQ[2] = z tmpQ2 = quate.QuaternionProduct(q, tmpQ) tmpQ = quate.PointQuaternionProd(tmpQ2, qConj) nx = tmpQ[0] ny = tmpQ[1] nz = tmpQ[2] if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def writeClustersPDBS(Clusters, dirout, mode, quate, convNames, printElonged, performTranslation, cell_dim, modeTra="frac"): if not os.path.exists(dirout): os.makedirs(dirout) for ci in range(len(Clusters)): clu = Clusters[ci] lion = (clu["heapSolutions"]).asList() dirClu = dirout + str(ci) + "/" if not os.path.exists(dirClu): os.makedirs(dirClu) structureRef = None for i in range(len(lion)): rota = (lion[i])[1] if not printElonged and rota["elong"] > 0: continue parser = PDBParser() structure = parser.get_structure(rota["name"], convNames[rota["name"]]) if mode == "matrix": rotateStructureByMatrix(ci, i, structure, rota["rotationMatrices"], dirClu, cell_dim) elif mode == "quaternion": rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) elif mode == "simmetry_rotated": """ quo = None if "simmetry_rotated" in rota: quo = rota["simmetry_rotated"] else: quo = rota["quaternion"] rotateStructureByQuaternion(ci,i,structure,quate,quo,dirClu) """ if "simmetry_rotated" in rota: # rotateStructureByQuaternion(ci,i,structure,quate,rota["quaternion"],dirClu) # parser=PDBParser() # structure=parser.get_structure(rota["name"],dirClu+str(ci)+"_"+str(i)+"_rot.pdb") rotateStructureByQuaternion(ci, i, structure, quate, rota["simmetry_rotated"], dirClu) else: rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) # print performTranslation,modeTra if modeTra == "frac" and performTranslation: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyFrac(ci, i, structure, rota["frac"], dirClu, cell_dim) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") elif performTranslation and modeTra == "Cmass": # print "Devo scrivere traslai",i if i == 0: parser = PDBParser() structureRef = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") else: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structure, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") def translateStructurebyFrac(num, num2, struct, frac, outputPath, cell_dim, writePDB=True): structure = copy.deepcopy(struct) if writePDB: pdb = open(outputPath + str(num) + "_" + str(num2) + "_rottra.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) atom.set_coord(numpy.array([nx, ny, nz])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() else: return structure def filterEqualRotations(Clusters, convNames, quate, laue, listNCS, cell_dim, where): Clus = [] index = 0 for clu in Clusters: lis = copy.deepcopy(clu["heapSolutions"].asList()) lit = [] for item in lis: prio, rota = item lit.append(rota) Pru = [] Pru = clusterizeRotations(lit, quate, laue, listNCS, Pru, 0.0, "distributionCV", convNames, cell_dim, 0.5, False) din = {} hp = ADT.Heap() for tru in Pru: prio, item = tru["heapSolutions"].pop() hp.push(prio, item) writeSumClusters(tru, where, "clustersIDE" + str(index), convNames) din["heapSolutions"] = hp Clus.append(din) index += 1 return Clus def translateStructurebyVector(num, num2, structure, vect, outputPath, centroidCA=None): pdb = open(outputPath + str(num) + "_" + str(num2) + "_rottra.pdb", "w") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) nx = 0.0 ny = 0.0 nz = 0.0 if centroidCA != None: # SUBTRACT TO EACH STRUCTURE THE CORRESPONDING CENTROID nx = (x - centroidCA[0]) + vect[0] ny = (y - centroidCA[1]) + vect[1] nz = (z - centroidCA[2]) + vect[2] else: nx = x + vect[0] ny = y + vect[1] nz = z + vect[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def translateStructurebyCentroidMass(num, num2, structureMove, structureRef, outputPath): # COMPUTE CENTROID OF MASS OF THE FRAGMENT TO MOVE AND REFERENCE listCAs1 = [] for model in structureMove: for chain in model: for residue in chain: # coord = residue["CA"].get_coord() for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs1.append([coord[0], coord[1], coord[2]]) # listCAs1.append(residue["CA"]) listCAs2 = [] for model in structureRef: for chain in model: for residue in chain: for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs2.append([coord[0], coord[1], coord[2]]) # listCAs2.append(residue["CA"]) if len(listCAs2) > len(listCAs1): how = -1 * (len(listCAs2) - len(listCAs1)) listCAs2 = listCAs2[:how] elif len(listCAs1) > len(listCAs2): how = -1 * (len(listCAs1) - len(listCAs2)) listCAs1 = listCAs1[:how] """ super_imposer = Superimposer() super_imposer.set_atoms(listCAs2, listCAs1) rot, tran = super_imposer.rotran rot=rot.astype('f') tran=tran.astype('f') """ moveCas = numpy.array(listCAs1) centroidMove = numpy.mean(moveCas, axis=0) refCas = numpy.array(listCAs2) centroidRef = numpy.mean(refCas, axis=0) # COMPUTE THE VECTOR BETWEEN THE TWO CENTROIDS A,B,C = XCREF-XCMOVE,YCREF-XCMOVE,ZCREF-YCMOVE a = centroidRef[0] - centroidMove[0] b = centroidRef[1] - centroidMove[1] c = centroidRef[2] - centroidMove[2] # a = centroidMove[0]-centroidRef[0] # b = centroidMove[1]-centroidRef[1] # c = centroidMove[2]-centroidRef[2] vect = [a, b, c] # print "Translation Vector",vect #tran # SEND TO TRANSLATE STRUCTUREBYVECTOR # print centroidRef, centroidMove, vect # translateStructurebyVector(num,num2,structureMove,tran,outputPath) translateStructurebyVector(num, num2, structureMove, vect, outputPath) def __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=None, getGlobalStats=False): global MAP_OF_ROT_COMB CluWork = [] if frag_fixed > 1: merged = __mergeEquivalentRotCombination() for key in merged: dictio = {"heapSolutions": ADT.Heap()} alli = [key] + merged[key] for clu in ClusAll: for item in clu["heapSolutions"].asList(): prio, rota = item if rota["n_prev_cluster"] not in alli: break dictio["heapSolutions"].push(prio, rota) CluWork.append(dictio) else: CluWork = ClusAll numc = 0 listRotaClus = [] stats = {} zmax = 0 llgmax = 0 distinctall = [] for clu in CluWork: if len(clu["heapSolutions"].asList()) == 0: continue numc = clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] # print "N.Cluster",numc if LIMIT_CLUSTER != None and LIMIT_CLUSTER != numc: continue for rel in MAP_OF_ROT_COMB.keys(): val = MAP_OF_ROT_COMB[rel] # print "!!!!-----!!!!---!!!!!",val,numc,val==numc,type(val),type(numc),rel if val == numc: numc = tuple(sorted(map(lambda x: int(x), json.loads(str(rel).replace("(", "[").replace(")", "]"))))) # print "////",rel,numc break distinct_pdbs = [] nrts = len(clu["heapSolutions"].asList()) llg_all = [] zscore_all = [] for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) if rota["llg"] > llgmax: llgmax = rota["llg"] if rota["zscore"] > zmax: zmax = rota["zscore"] pdbname = ensembles[rota["name"]] corresponding = (pdbname.split("/"))[-1] listona = corresponding.split("_") pdbid = listona[0] model = listona[1] idSolution = listona[-1] idSolution, ext = idSolution.split(".") if pdbid not in distinct_pdbs: distinct_pdbs.append(pdbid) if pdbid not in distinctall: distinctall.append(pdbid) llg_avg = float(numpy.mean(numpy.array(llg_all))) zscore_avg = float(numpy.mean(numpy.array(zscore_all))) llg_std = float(numpy.std(numpy.array(llg_all))) llg_min = float(numpy.min(numpy.array(llg_all))) llg_max = float(numpy.max(numpy.array(llg_all))) zscore_max = float(numpy.max(numpy.array(zscore_all))) distpdb = len(distinct_pdbs) listRotaClus.append((distpdb, llg_max, numc)) stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min] if getGlobalStats: return stats, listRotaClus, len(distinctall), llgmax, zmax else: return stats, listRotaClus def __getStatFromSumAndModel(sumPath, modelo, fullmodel, cluster, mode, n_ense=None): Clu, dicname = readClustersFromSUM(sumPath) # print "##",sumPath # print "Numero clusters",len(Clu) # for rf in Clu: # print "Numero rot.",len(rf["heapSolutions"].asList()) rop = None topLLG = None topZSCORE = None posRank = None pos = 1 frag = -1 rp = None if mode == "ARCIMBOLDO": newPath = os.path.join(os.path.split(os.path.dirname(sumPath))[0], "5_RNP_LIBRARY/clusters.sum") if newPath != sumPath: tupl = __getStatFromSumAndModel(newPath, fullmodel, fullmodel, cluster, mode) rp = tupl[0] if cluster == None: Clu = sorted(Clu, __cmp_cluster, reverse=True) for clust in Clu: if cluster != None: pos = 1 for item in clust["heapSolutions"]: prio, rota = item actualmodel = "" model = copy.deepcopy(modelo) if cluster != None and rota["n_prev_cluster"] != int(cluster): break actualmodel = os.path.basename(dicname[rota["name"]]) if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES", "RNP"]: if not ("FR" in actualmodel and "xx" in actualmodel): ad = actualmodel.split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[2].split("-")[0] elif mode in ["ARCIMBOLDO-SHREDDER"]: # ad = actualmodel.split("_") # actualmodel = ad[0]+"_0_0.pdb" # ad = model.split("_") # model = ad[0]+"_0_0.pdb" # NOTE: The name of the model is already correct, and the first _0_ might not be 0 if the run comes from spheres pass elif mode == "ARCIMBOLDO": # print os.path.basename(dicname[rota["name"]])[:-4] # print "Analyzing",rota["name"] frag = int(model.split("FR")[1].split("_")[0]) frar = int(rota["name"].split("FR")[1].split("_")[0]) actualmodel = "" # print "Found",frag,"in model and",frar,"rota_name" if frag == frar: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" # .split("_")[0] model = "xx" + model.split("xx")[1] # print "////....////",actualmodel,model # if actualmodel == model: # print actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) elif rp != None and "fixed_frags" in rp: for fri in rp["fixed_frags"]: frag = int(fri["name"].split("FR")[1].split("_")[0]) # print "--------------frag",frag,"------------------frar",frar,"friname",fri["name"] if frag == frar: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" if len(actualmodel.split("-")) > 1: model = fri["name"].split("ensembleID")[1] + ".pdb" elif len(fri["name"].split("ensembleID")[1].split("-")) > 1: model = fri["name"].split("ensembleID")[1].split("-")[0] + ".pdb" else: model = fri["name"].split("ensembleID")[1] + ".pdb" # print "----....----...",actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) break # model = model.split(".")[0] if model.endswith(".pdb") and not actualmodel.endswith(".pdb"): actualmodel += ".pdb" elif not model.endswith(".pdb") and actualmodel.endswith(".pdb"): actualmodel = actualmodel[:-4] #print "=====",n_ense,actualmodel,model,rota["name"].split("-")[1] if (n_ense == None and actualmodel == model) or ( actualmodel == model and int(rota["name"].split("-")[1]) == int(n_ense)): rop = rota posRank = pos if rota["llg"] > topLLG: topLLG = rota["llg"] if rota["zscore"] > topZSCORE: topZSCORE = rota["zscore"] pos += 1 return rop, topLLG, topZSCORE, posRank def __getStatFromSumCCAndModel(pathBCC, pathACC, model, mode): befref = [] aftref = [] if pathBCC != None: befref, con2 = readCCValFromSUM(pathBCC) if pathACC != None: aftref, con1 = readCCValFromSUM(pathACC) listcc_before = [] listcc_after = [] for mod in befref: listcc_before.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_before = sorted(listcc_before, reverse=True) posB = 1 ropB = None topINITCC_B = None if pathBCC != None: topINITCC_B = listcc_before[0][0] posRankB = None for item in listcc_before: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) if actualmodel == model: ropB = item posRankB = posB break posB += 1 for mod in aftref: listcc_after.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_after = sorted(listcc_after, reverse=True) posA = 1 ropA = None topINITCC_A = None if pathACC: topINITCC_A = listcc_after[0][0] posRankA = None for item in listcc_after: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) # print mode,actualmodel,model if actualmodel == model: # if os.path.basename(item[2]) == model: ropA = item posRankA = posA break posA += 1 return ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA def generatePDFGraph(current_dir, nameOutput, title, data): data = sorted(data) helil = map(lambda x: x[0], data) llgs = map(lambda x: x[1], data) rmsds = map(lambda x: x[2], data) ress = map(lambda x: x[3], data) nomepdf = os.path.join(current_dir, nameOutput + ".pdf") def make_colormap(seq): """Return a LinearSegmentedColormap seq: a sequence of floats and RGB-tuples. The floats should be increasing and in the interval (0,1). """ seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3] cdict = {'red': [], 'green': [], 'blue': []} for i, item in enumerate(seq): if isinstance(item, float): r1, g1, b1 = seq[i - 1] r2, g2, b2 = seq[i + 1] cdict['red'].append([item, r1, r2]) cdict['green'].append([item, g1, g2]) cdict['blue'].append([item, b1, b2]) return mcolors.LinearSegmentedColormap('CustomMap', cdict) c = mcolors.ColorConverter().to_rgb rvb = make_colormap( [c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.scatter(array_dg[:, 0], array_dg[:, 1], c=rmsd, cmap=rvb) plt.colorbar() plt.show() with PdfPages(nomepdf) as pdf: rvb = make_colormap([c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.figure(figsize=(3, 3)) plt.title('RMSD') plt.scatter(helil, llgs, c=rmsds, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() rvb = make_colormap([c('green'), c('blue'), 0.33, c('violet'), c('black'), 0.66, c('black')]) plt.rc('text', usetex=True) plt.figure(figsize=(3, 3)) plt.title('Resolution') plt.scatter(helil, llgs, c=ress, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() def __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res): full = "" base = "[\n" base += "['Helix', 'LLG', {'type': 'string', 'role': 'style'}, {'type':'string', 'role':'tooltip'}],\n" full += base for it in range(len(data)): item = data[it] # print "========",item[3],first_res, last_res,SystemUtility.htmlRgb(item[3], first_res, last_res,"blue") stroke_color = "" + str(SystemUtility.htmlRgb(item[3], first_res, last_res, "blue")) # hexadecimal stroke_opacity = "0.6" # float stroke_width = "1" # integer # print "********",item[2],first_rmsd, last_rmsd,SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd,"red") fill_color = "" + str(SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd, "red")) # hexadecimal fill_opacity = "1.0" full += "[" + str('%.2f' % item[0]) + ", " + str('%.2f' % (item[1] / item[ 0])) + ", 'point {stroke-color: " + stroke_color + "; stroke-opacity: " + stroke_opacity + "; stroke-width: " + stroke_width + "; fill-color: " + fill_color + "; fill-opacity: " + fill_opacity + "}','LLG: " + str( '%.2f' % item[1]) + ", RMSD: " + str('%.2f' % item[2]) + ", Res: " + str('%.2f' % item[3]) + "']" if it == len(data) - 1: full += "\n" else: full += ",\n" full += "]\n" return full def __extractTablesGraph(data): dic_rmsd_results = {} dic_res_results = {} full = "" data = sorted(data) first_rmsd = min(map(lambda x: x[2], data)) last_rmsd = max(map(lambda x: x[2], data)) first_res = min(map(lambda x: x[3], data)) last_res = max(map(lambda x: x[3], data)) dic_rmsd = {} dic_res = {} for it in range(len(data)): item = data[it] key_rmsd = '%.2f' % item[2] key_res = '%.2f' % item[3] if key_rmsd not in dic_rmsd.keys(): dic_rmsd[key_rmsd] = [item] else: dic_rmsd[key_rmsd].append(item) if key_res not in dic_res.keys(): dic_res[key_res] = [item] else: dic_res[key_res].append(item) full = __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res) for key_rmsd in dic_rmsd.keys(): value_rmsd = dic_rmsd[key_rmsd] tabella = __writeLineTable(value_rmsd, first_rmsd, last_rmsd, first_res, last_res) dic_rmsd_results[key_rmsd] = tabella for key_res in dic_res.keys(): value_res = dic_res[key_res] tabella = __writeLineTable(value_res, first_rmsd, last_rmsd, first_res, last_res) dic_res_results[key_res] = tabella return dic_rmsd_results, dic_res_results, full def generateHTMLGraph(current_dir, nameOutput, title, data): nomehtml = os.path.join(current_dir, nameOutput + ".html") base_header = """
Borges started at: """ + root.find('configuration/time_start').text + """.
=============================INPUT FILE """ + root.find('configuration/bor_name').text + """=============================
""" + root.find('configuration/bor_text').text + """
============================================Summary of your data=====================================================
SPACEGROUP: """ + root.find('data/spacegroup').text + """
CELL DIMENSIONS: """ + root.find('data/cell_dim/A').text + ", " + root.find('data/cell_dim/B').text + ", " + root.find(
'data/cell_dim/C').text + ", " + root.find('data/cell_dim/alpha').text + ", " + root.find(
'data/cell_dim/beta').text + ", " + root.find('data/cell_dim/gamma').text + """
RESOLUTION: """ + root.find('data/resolution').text + """
NUMBER OF UNIQUE REFLECTIONS: """ + root.find('data/unique_refl').text + """
WARNING: At """ + root.find('data/resolution').text + """ it will be difficult to solve the structure and foms might be misleading
""" elif float(root.find('data/resolution').text) > 2.5: base_header += """WARNING: At """ + root.find('data/resolution').text + """ ARCIMBOLDO is not supported. Please contact us for specific help
""" if float(root.find('data/completeness').text) < 98: base_header += """WARNING: Having """ + root.find('data/completeness').text + """ of Completeness of the data for the given resolution is not enough for the success of the method thus ARCIMBOLDO will be terminated!
""" if root.tag == "arcimboldo": base_header += """\nFragment """ + str(fran) + """ | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Cluster | Rotation Function | Translation Function | Packing | Rigid Body Refinement | Initial CC | Best Trace CC/aa | ||||||||||||||||
#Rots. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Trans. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Sol. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Sol. | Top LLG | Mean LLG | After Refinement CC | Cycle | CC | #Res. traced | """ + str(key).replace("[", "(").replace("]", ")") + """ | \n""" + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Rotations").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_zscore").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/FTF") is not None: clustftf = json.loads(root.find("ens1_frag" + str(fran) + "/FTF/allclus").text) if key in clustftf: body_table_row += """""" + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Translations").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_zscore").text) + """ | """ + str(root.find( "ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/PACK") is not None: clustpack = json.loads(root.find("ens1_frag" + str(fran) + "/PACK/allclus").text) if key in clustpack: body_table_row += """""" + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Translations").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_llg").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_llg").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_zscore").text) + """ | """ + str(root.find("ens1_frag" + str( fran) + "/PACK/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/RNP") is not None: clustrnp = json.loads(root.find("ens1_frag" + str(fran) + "/RNP/allclus").text) if key in clustrnp: body_table_row += """""" + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Solutions").text) + """ | """ + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Top_llg").text) + """ | """ + str(root.find( "ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Mean_llg").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/INITCC") is not None: clustinitcc = json.loads( root.find("ens1_frag" + str(fran) + "/INITCC/allclus").text) # print "Fragment",fran,"CLUSTINITCC",clustinitcc,"KEY",key if key in clustinitcc: body_table_row += """""" + str(root.find("ens1_frag" + str( fran) + "/INITCC/C" + keyd + "/initcc").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/EXP") is not None: if root.find("ens1_frag" + str( fran) + "/EXP/Cluster").text == str(key): body_table_row += """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/cycle").text) + """ | \n""" body_table_row += """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/finalcc").text) + """ | \n""" body_table_row += """""" + str(root.find( "ens1_frag" + str( fran) + "/EXP/restraced").text) + """ | \n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str( key): colorbg.append("0xA069D6") if float(root.find("ens1_frag" + str( fran) + "/EXP/finalcc").text) >= 30.0: start_table_row += """
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str( key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str( key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str( key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||
\n""" start_table_row += """ | ||||||||||||||||||||||
\n""" start_table_row += """ | ||||||||||||||||||||||
The current best solution is: " + str(
root.find('backtracing/model').text) + " with FINALCC: " + str(
root.find('backtracing/finalcc').text) + " and n. residues traced " + str(
root.find('backtracing/restraced').text) + "
file is: " + str(
root.find('backtracing/file').text) + "
#Cluster | #Rotations | #Distinct Pdb | Top LLG | LLG Mean | Top Zscore | Zscore Mean |
---|---|---|---|---|---|---|
#Cluster | Rotation and Model Refinement | Translation Function | Packing | Rigid Body Refinement | Initial CC | Best Trace CC/aa | ||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
#Rots. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Trans. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Sol. | Top LLG | Mean LLG | Top Zscore | Mean Zscore | #Sol. | Top LLG | Mean LLG | Before Refinement CC | After Refinement CC | MODE | Cycle | CC | #Res. traced | """ + str(key).replace("[", "(").replace("]", ")") + """ | \n""" + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Rotations").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Top_zscore").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/FRF/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/FTF") is not None: clustftf = json.loads(root.find("ens1_frag" + str(fran) + "/FTF/allclus").text) if key in clustftf and root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd) is not None: body_table_row += """""" + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Translations").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_llg").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Top_zscore").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/FTF/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/PACK") is not None: clustpack = json.loads(root.find("ens1_frag" + str(fran) + "/PACK/allclus").text) if key in clustpack and root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd) is not None: body_table_row += """""" + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Translations").text) + """ | """ + str(root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_llg").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_llg").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Top_zscore").text) + """ | """ + str(root.find( "ens1_frag" + str(fran) + "/PACK/C" + keyd + "/Mean_zscore").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/RNP") is not None: clustrnp = json.loads(root.find("ens1_frag" + str(fran) + "/RNP/allclus").text) if key in clustrnp and root.find("ens1_frag" + str( fran) + "/RNP/C" + keyd) is not None: body_table_row += """""" + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Solutions").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Top_llg").text) + """ | """ + str( root.find("ens1_frag" + str(fran) + "/RNP/C" + keyd + "/Mean_llg").text) + """ | \n""" if root.find("ens1_frag" + str(fran) + "/INITCC") is not None: clustinitcc = json.loads( root.find("ens1_frag" + str(fran) + "/INITCC/allclus").text) if key in clustinitcc and root.find("ens1_frag" + str( fran) + "/INITCC/BEFORE/C" + keyd) is not None: body_table_row += """""" + str(root.find("ens1_frag" + str( fran) + "/INITCC/BEFORE/C" + keyd + "/initcc").text) + """ | """ + str(root.find("ens1_frag" + str( fran) + "/INITCC/AFTER/C" + keyd + "/initcc").text) + """ | \n""" if root.find("ens1_frag" + str( fran) + "/EXP") is not None and root.find('ens1_frag' + str( fran) + '/EXP/C' + str(keyd)) is not None: # print "EXP",root.find("ens1_frag"+str(fran)+"/EXP/C"+str(keyd)+"/Cluster").text,str(key),root.find("ens1_frag"+str(fran)+"/EXP/C"+str(keyd)+"/Cluster").text == str(key) if root.find("ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/Cluster").text == str(key): body_table_row += """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/mode").text) + """ | \n""" body_table_row += """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/cycle").text) + """ | \n""" body_table_row += """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/finalcc").text) + """ | \n""" body_table_row += """""" + str(root.find( "ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/restraced").text) + """ | \n""" else: for tyo in range(4): body_table_row += """\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if float(root.find("ens1_frag" + str(fran) + "/EXP/C" + str( keyd) + "/finalcc").text) >= 30.0: start_table_row += """ |
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if root.find('ens1_frag' + str( fran) + '/INITCC/bestclusterinitcc').text == str(key): colorbg.append("0xA069D6") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str( key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str( fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str( key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" colorbg = [] if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterllg').text == str(key): colorbg.append("0x6C8AD5") if root.find('ens1_frag' + str(fran) + '/PACK/bestclusterzscore').text == str(key): colorbg.append("0x33CEC3") if len(colorbg) > 0: colorbgt = 0 for cl in colorbg: colorbgt += int(cl, 16) colorbgt = "#" + hex(colorbgt)[2:8] start_table_row += """ | ||||||||||||||||||||||||
\n""" start_table_row += """ | ||||||||||||||||||||||||
\n""" start_table_row += """ | ||||||||||||||||||||||||
The current best solution is: " + str(
root.find('backtracing/model').text) + " with FINALCC: " + str(
root.find('backtracing/finalcc').text) + " and n. residues traced " + str(
root.find('backtracing/restraced').text) + "
file is: " + str(
root.find('backtracing/file').text) + "
""" + "\n"
listatime = json.loads(listatime.text)
for tim in listatime:
mode, step, ti = tim
base_header += """Time MODE: """ + str(mode) + """ STEP: """ + str(step) + """ """ + str(ti) + """
\n"""
base_header += """