# ! /usr/bin/env python # -*- coding: utf-8 -*- import warnings warnings.simplefilter("ignore", DeprecationWarning) warnings.simplefilter("ignore", UserWarning) import sys import numpy import os import glob import shutil import time import re import datetime import SystemUtility import subprocess import Quaternions import Grid import tarfile import stat import ADT import scipy.stats # import scipy.cluster.vq import threading import Bioinformatics3 import Bioinformatics import arci_output import copy import traceback import itertools import pickle import json import cStringIO import io import BORGES_MATRIX import getpass # import matplotlib.pyplot as plt # import matplotlib.colors as mcolors # from matplotlib.backends.backend_pdf import PdfPages from Bio.PDB import * from termcolor import colored import xml.etree.ElementTree as ET # Auto manipulated from the library BRFfor = -1 listaEva = {} # To be set up from the binaries call LOCAL = True SHELXE_REQUIREMENTS = "" PHASER_REQUIREMENTS = "" BORGES_REQUIREMENTS = "" GRID_TYPE_L = "" GRID_TYPE_R = "" PATH_NEW_PHASER = "" PATH_NEW_SHELXE = "" PATH_NEW_BORGESCLIENT = "" PATH_NEW_ARCIFIRE = "" # INTERNAL VARIABLES TO CHANGE ONLY IF PHASER OR SHELXE CHANGES PHASER_V1 = "2.7" # PREVIOUS VERSION PHASER_V2 = "2.8" # SUPPORTED VERSION # Shelxe headers SHELXE_V1 = "2018/1" SHELXE_V2 = "2018/2" PYTHON_V = (2, 7, 9) GCC_V = "[GCC 4.4.3]" SHELXE_LST_END_CONDITION = """grep "finished" *.lst | wc -l""" SHELXE_LST_FAILURE_CONDITION = """grep "giving up" *.lst | wc -l""" SHELXE_PDB_PHS_END_CONDITION = """ls *.lst | wc -l""" SHELXE_LST_END_CONDITION_LOCAL = """finished""" SHELXE_LST_FAILURE_CONDITION_LOCAL = """giving up""" PHASER_OUT_END_CONDITION = """grep "EXIT STATUS: SUCCESS" *.out | wc -l""" PHASER_OUT_FAILURE_CONDITION = """grep "EXIT STATUS: FAILURE" *.out | wc -l""" PHASER_OUT_ANY_CASE = """grep "EXIT STATUS:" *.out | wc -l""" PHASER_NMA_END_CONDITION = PHASER_OUT_END_CONDITION PHASER_RLIST_SOL_PDB_END_CONDITION = """ls *.out | wc -l""" PHASER_OUT_END_CONDITION_LOCAL = """EXIT STATUS: SUCCESS""" PHASER_OUT_FAILURE_CONDITION_LOCAL = """EXIT STATUS: FAILURE""" PHASER_NMA_END_CONDITION_LOCAL = PHASER_OUT_END_CONDITION_LOCAL PHASER_OUT_END_TEST = 1 PHASER_NMA_END_TEST = PHASER_OUT_END_TEST PHASER_RLIST_SOL_PDB_END_TEST = 0 SHELXE_LST_END_TEST = 1 SHELXE_PDB_PHS_END_TEST = 0 NUMBER_OF_FILES_PER_DIRECTORY = 1000 BASE_SUM_FROM_WD = True LAST_AVAILABLE_ROTID = 0 MAP_OF_ROT_COMB = {} POSTMORTEM = False STOP_IF_SOLVED = True ############################################################### ####################################################################################################### # FUNCTIONS # ####################################################################################################### def mergeRotClusterObjects(Clu1, Clu2, suffix="merged", reset_euler="none"): """ :param Clu1: :type Clu1: :param Clu2: :type Clu2: :param suffix: :type suffix: :param reset_euler: :type reset_euler: :return: :rtype: """ for clu2 in Clu2: if len(clu2["heapSolutions"].asList()) > 0: for clu1 in Clu1: if len(clu1["heapSolutions"].asList()) > 0: if clu1["heapSolutions"].asList()[0][1]["n_prev_cluster"] == \ clu2["heapSolutions"].asList()[0][1]["n_prev_cluster"]: kd = ADT.Heap() for item in clu1["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] + suffix if reset_euler.lower() in ["first", "both"]: rota["euler"] = [0.0, 0.0, 0.0] kd.push(prio, rota) for item in clu2["heapSolutions"].asList(): prio, rota = item rota["name"] = rota["name"] if reset_euler.lower() in ["second", "both"]: rota["euler"] = [0.0, 0.0, 0.0] kd.push(prio, rota) clu2["heapSolutions"] = kd break return Clu2 def __apply_inversion_scheme(scheme, totalclu2, sol2, ensembles2): """ :param scheme: :type scheme: :param totalclu2: :type totalclu2: :param sol2: :type sol2: :param ensembles2: :type ensembles2: :return: :rtype: """ def _modify_sol(sol, ensembles, inverted, totalclu): pathoide = ensembles[sol["name"]] dirname, basename = os.path.split(pathoide) # print "original name",sol["name"] # print "original ensembles",ensembles[sol["name"]] if inverted: basename = basename.replace("th", "thinverted") if "thinverted" not in basename else basename if "INV" in sol["name"]: sol["name"] = sol["name"].split("INV")[0] + "INV" + str(totalclu) + "INV" + sol["name"].split("INV")[2] elif "NNN" in sol["name"]: sol["name"] = sol["name"].split("NNN")[0] + "INV" + str(totalclu) + "INV" + sol["name"].split("NNN")[2] else: sol["name"] = sol["name"].split("xx")[0] + "INV" + str(totalclu) + "INVxx" + sol["name"].split("xx")[1] # print "Done inverting",sol["name"] else: basename = basename.replace("thinverted", "th") if "thinverted" in basename else basename if "INV" in sol["name"]: sol["name"] = sol["name"].split("INV")[0] + "NNN" + str(totalclu) + "NNN" + \ sol["name"].split("INV")[2] elif "NNN" in sol["name"]: sol["name"] = sol["name"].split("NNN")[0] + "NNN" + str(totalclu) + "NNN" + \ sol["name"].split("NNN")[2] else: sol["name"] = sol["name"].split("xx")[0] + "NNN" + str(totalclu) + "NNNxx" + \ sol["name"].split("xx")[1] # print "Done natural",sol["name"] # sol["original_rotcluster"] = "_".join(["-"+s for s in sol["original_rotcluster"].split("_")]) ensembles[sol["name"]] = os.path.join(dirname, basename) # sol["n_prev_cluster"] = clus_id # print "modifico ensembles",ensembles[sol["name"]] return sol, ensembles for ele in scheme: # print "Current is ",ele if ele[0] >= 0: if ele[1] == "n": sol2["fixed_frags"][ele[0]], ensembles2 = _modify_sol(sol2["fixed_frags"][ele[0]], ensembles2, False, totalclu2) elif ele[1] == "i": sol2["fixed_frags"][ele[0]], ensembles2 = _modify_sol(sol2["fixed_frags"][ele[0]], ensembles2, True, totalclu2) else: if ele[1] == "n": sol2, ensembles2 = _modify_sol(sol2, ensembles2, False, totalclu2) elif ele[1] == "i": sol2, ensembles2 = _modify_sol(sol2, ensembles2, True, totalclu2) return sol2, ensembles2 def read_all_data_from_expansion_directory(pathExp): """ :param pathExp: :type pathExp: :return: :rtype: """ dizio = {} p = subprocess.Popen('grep -H CC ' + os.path.join(pathExp,"*/*/*.pdb"), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() lines = out.splitlines() for line in lines: values = line.split() rootdir,name = os.path.split(values[0].split(":TITLE")[0]) cc = float(values[6][:-1]) nres = int(values[7]) if name not in dizio: dizio[name] = {"cc":cc,"nres":nres,"rootdir":rootdir} elif cc > dizio[name]["cc"]: dizio[name]["cc"] = cc dizio[name]["nres"] = nres dizio[name]["rootdir"] = rootdir for name in dizio: lst = os.path.join(dizio[name]["rootdir"],name[:-4]+".lst") p = subprocess.Popen('grep -H wMPE ' + lst, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() lines = out.splitlines() dizio[name]["wMPE"] = float(lines[-1].split("wMPE")[1].split("/")[0]) p = subprocess.Popen('grep -H incorrect ' + lst, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() lines = out.splitlines() dizio[name]["incorrect"] = float(lines[-1].split()[-3][:-1]) return dizio def generateInvertedHelices(ensembles, CluAll, top_inverted_solution_per_cluster, skipFirst=True, add_a_random_solution=False, spaceGroup=None): """ :param ensembles: :type ensembles: :param CluAll: :type CluAll: :param top_inverted_solution_per_cluster: :type top_inverted_solution_per_cluster: :param skipFirst: :type skipFirst: :return: :rtype: """ Clu = [] # NOTE: Iterate over each cluster and generate a new cluster that is the ghost cluster (if Cl.id is 1 ghost is -1, if (1,3) is (-1,-3) # clids = [] # for clu in CluAll: # for item in clu["heapSolutions"].asList(): # prio,sol = item # clids.append(sol["n_prev_cluster"]) # break # # clids = max(clids)+1 for clu in CluAll: # Clu.append(copy.deepcopy(clu)) li = [] totalclu = 0 for item in clu["heapSolutions"].asList(): # NOTE: I have to create combinations for each solution until top_inverted_solution_per_cluster is reached if totalclu >= top_inverted_solution_per_cluster: break schemes = [] prio, sol = item licombi = range(len(sol["fixed_frags"])) + [-1] if "fixed_frags" in sol else [-1] way = ["n", "i"] s = [[] for _ in licombi] [s[t].append((i, j)) for t, i in enumerate(licombi) for j in way] first = True # print "-------------------------------------",list(itertools.product(*s)) #((0, 'n'), (1, 'n'), (2, 'n'), (-1, 'n')) if not skipFirst: w = [] for i in licombi: if (i>=0 and ("INV" not in sol["fixed_frags"][i]["name"])): w.append((i, 'n')) elif (i>=0 and ("INV" in sol["fixed_frags"][i]["name"])): w.append((i, "i")) elif (i<0 and ("INV" not in sol["name"])): w.append((i, 'n')) elif (i<0 and ("INV" in sol["name"])): w.append((i, "i")) w = tuple(w) #print(".........ADDING........",w) new_sol, ensembles = __apply_inversion_scheme(w, totalclu, copy.deepcopy(sol), ensembles) li.append((prio, new_sol)) totalclu += 1 schemes.append(w) if totalclu <= 1 and add_a_random_solution: new_sol = copy.deepcopy(sol) if "INV" in new_sol["name"]: new_sol["name"] = new_sol["name"].split("INV")[0] + "WRO" + str(totalclu) + "WRO" + \ new_sol["name"].split("INV")[2] elif "NNN" in new_sol["name"]: new_sol["name"] = new_sol["name"].split("NNN")[0] + "WRO" + str(totalclu) + "WRO" + \ new_sol["name"].split("NNN")[2] else: new_sol["name"] = new_sol["name"].split("xx")[0] + "WRO" + str(totalclu) + "WROxx" + \ new_sol["name"].split("xx")[1] ensembles[new_sol["name"]] = ensembles[sol["name"]] if "fixed_frags" in new_sol: for nam in new_sol["fixed_frags"]: oldname = nam["name"] if "INV" in nam["name"]: nam["name"] = nam["name"].split("INV")[0] + "WRO" + str(totalclu) + "WRO" + \ nam["name"].split("INV")[2] elif "NNN" in nam["name"]: nam["name"] = nam["name"].split("NNN")[0] + "WRO" + str(totalclu) + "WRO" + \ nam["name"].split("NNN")[2] else: nam["name"] = nam["name"].split("xx")[0] + "WRO" + str(totalclu) + "WROxx" + \ nam["name"].split("xx")[1] ensembles[nam["name"]] = ensembles[oldname] # print "Done natural",sol["name"] if spaceGroup not in ["P1","p1","P 1","p 1"]: new_sol["frac"] = [f + 0.1 if f+0.1<=1.0 else 1.0-f+0.1 for f in new_sol["frac"]] if "fixed_frags" in new_sol: for u,frag in enumerate(new_sol["fixed_frags"]): new_sol["fixed_frags"][u]["frac"] = [f + 0.1 if f+0.1<=1.0 else 1.0-f+0.1 for f in frag["frac"]] else: if "fixed_frags" in new_sol: for u, frag in enumerate(0,new_sol["fixed_frags"],2): new_sol["fixed_frags"][u]["frac"] = [f + 0.1 if f+0.1<=1.0 else 1.0-f+0.1 for f in frag["frac"]] if "fixed_frags" not in new_sol or len(new_sol["fixed_frags"]) % 2 == 0: new_sol["frac"] = [f + 0.1 if f+0.1<=1.0 else 1.0-f+0.1 for f in new_sol["frac"]] li.append((prio, new_sol)) totalclu += 1 for w in itertools.product(*s): #print("INVERSION SCHEME",w) if first: first = False if skipFirst: continue if w in schemes: continue if totalclu < top_inverted_solution_per_cluster: new_sol, ensembles = __apply_inversion_scheme(w, totalclu, copy.deepcopy(sol), ensembles) li.append((prio, new_sol)) totalclu += 1 schemes.append(w) else: break for item in li: prio, new_sol = item clu["heapSolutions"].push(prio, new_sol) Clu.append(copy.deepcopy(clu)) return ensembles, Clu def generateFakeMRSum_sols(model_file, initlocations, mode, single_cluster, output_direc, namesum, arcimboldo=False): """ :param model_file: :type model_file: :param initlocations: :type initlocations: :param mode: :type mode: :param single_cluster: :type single_cluster: :param output_direc: :type output_direc: :param namesum: :type namesum: :param arcimboldo: :type arcimboldo: :return: :rtype: """ Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for solu in initlocations: counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [solu[3], solu[4], solu[5]], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [solu[0], solu[1], solu[2]]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = model_file if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def generateFakeMRSum(models_dir, mode, single_cluster, output_direc, namesum, arcimboldo=False): """ :param models_dir: :type models_dir: :param mode: :type mode: :param single_cluster: :type single_cluster: :param output_direc: :type output_direc: :param namesum: :type namesum: :param arcimboldo: :type arcimboldo: :return: :rtype: """ Clu = [] convn = {} counter_pdbs = 0 dic = {"heapSolutions": ADT.Heap()} for root, subFolders, files in os.walk(models_dir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): counter_pdbs += 1 prio = (1.0, 1.0) namerota = "" if not arcimboldo: namerota = 'ensemble' + str(counter_pdbs) else: namerota = "ensembleIDxx0FR" + str(counter_pdbs) + "_1" rota = {'rotationMatrices': [[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]], 'frac': [0.0, 0.0, 0.0], 'name': namerota, 'bfactor': 0.0, 'zscore': 1.0, 'numInRlist': counter_pdbs, 'quaternion': [0.0, 0.0, 0.0, 0.0], 'elong': 0.0, 'llg': 1.0, 'euler': [0.0, 0.0, 0.0]} if mode == "TRA" and arcimboldo: rota["name"] = rota["name"] + "-1" convn[rota['name']] = pdbf if arcimboldo: if single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) else: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) else: if mode in ["ROT", "TRA"] and single_cluster: rota['n_prev_cluster'] = 0 rota['original_rotcluster'] = '0' dic["heapSolutions"].push(prio, copy.deepcopy(rota)) elif mode == "ROT" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(dic) elif mode == "TRA" and not single_cluster: rota['n_prev_cluster'] = counter_pdbs - 1 rota['original_rotcluster'] = str(counter_pdbs - 1) Clu = [] dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, copy.deepcopy(rota)) Clu.append(copy.deepcopy(dic)) writeSumClusters(Clu, os.path.join(output_direc, str(rota['n_prev_cluster'])), namesum, convn) if mode in ["ROT", "TRA"] and single_cluster: Clu.append(dic) if arcimboldo: writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "ROT": writeSumClusters(Clu, output_direc, namesum, convn) elif mode == "TRA" and single_cluster: writeSumClusters(Clu, os.path.join(output_direc, str(0)), namesum, convn) def mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames, isARCIMBOLDO_LITE=True): """ :param DicParameters: :type DicParameters: :param sumPACK: :type sumPACK: :param CluAll: :type CluAll: :param convNames: :type convNames: :param isARCIMBOLDO_LITE: :type isARCIMBOLDO_LITE: :return: :rtype: """ convPACK, CluPACK, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPACK, "ROTSOL") inter = [item[1] for sublist in map(lambda y: y["heapSolutions"].asList(), CluPACK) for item in sublist] zscored = {(dizio["name"], tuple(sorted(dizio["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else dizio[ "n_prev_cluster"]): dizio["zscore"] for dizio in inter} #NOTE: PRINT FOR NICOLAS BUG #print zscored Clu = [] for clun in CluAll: dic = {"heapSolutions": ADT.Heap()} for item in clun["heapSolutions"].asList(): prio, rota = item rota["zscore"] = zscored[(rota["name"],tuple(sorted(rota["original_rotcluster"].split("_"))) if isARCIMBOLDO_LITE else rota["n_prev_cluster"])] prio = (-1 * rota["llg"], -1 * rota["zscore"]) dic["heapSolutions"].push(prio, rota) Clu.append(dic) return Clu, convNames def readPMGYREsum(gyreSUMPath): """ :param gyreSUMPath: :type gyreSUMPath: :return: :rtype: """ f = open(gyreSUMPath, "r") alllines = f.readlines() f.close() CCVAL = [] for line in alllines: riga1 = line.split() model = getNewPathFromMerging(gyreSUMPath, riga1[1]) size = int(riga1[3]) rmsd_gyre = float(riga1[5]) rmsd_init = float(riga1[7]) core_init = int(riga1[9]) rmsd_fin = float(riga1[11]) core_fin = int(riga1[13]) rmsd_init_common = float(riga1[15]) rmsd_fin_common = float(riga1[17]) rmsd_diff = float(riga1[19]) core_common = int(riga1[21]) llg = float(riga1[23]) dizio = {"model": model, "size": size, "rmsd_gyre": rmsd_gyre, "rmsd_init": rmsd_init, "core_init": core_init, "rmsd_fin": rmsd_fin, "core_fin": core_fin, "rmsd_init_common": rmsd_init_common, "rmsd_fin_common": rmsd_fin_common, "rmsd_diff": rmsd_diff, "core_common": core_common, "llg": llg} CCVAL.append(dizio) return CCVAL def generateStatisticsPMGyre(CC_sol, outputDir, gyrePath, cycle_ref): """ :param CC_sol: :type CC_sol: :param outputDir: :type outputDir: :param gyrePath: :type gyrePath: :param cycle_ref: :type cycle_ref: :return: :rtype: """ if not os.path.exists(gyrePath): return if not os.path.exists(outputDir): os.makedirs(outputDir) CC_valuta = {} for ele in CC_sol: CC_valuta[os.path.basename(ele["corresp"])] = ele for q in range(cycle_ref - 1): sumg = os.path.join(gyrePath, str(q) + "/pm_gyre.sum") val = readPMGYREsum(sumg) graph1 = os.path.join(outputDir, "rmsdgyre_size_" + str(q)) graph1a = os.path.join(outputDir, "rmsdgyre_size_filtered_" + str(q)) qe = open(graph1 + ".scri", "w") qe.write("set terminal png size 800,1400\nset output \"" + graph1 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set ylabel "RMSD after Gyre"\n') qe.write('set xlabel "Size of the model"\n') qe.write("unset key\n") qc = open(graph1a + ".scri", "w") qc.write("set terminal png size 800,1400\nset output \"" + graph1a + ".png\"\n") qc.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qc.write("unset key\n") qc.write('set ylabel "RMSD after Gyre"\n') qc.write('set xlabel "Size of the model"\n') qn = open(graph1 + ".data", "w") qn.write("#\tX\tY\tC\n") qf = open(graph1a + ".data", "w") qf.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["size"], y["size"])): if ele["model"] in CC_valuta: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qf.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write(str(ele["size"]) + "\t" + str("%.2f" % ele["rmsd_gyre"]) + "\t" + str("%.2f" % 100) + "\n") qe.write("plot \"" + graph1 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qn.close() qe.close() qc.write("plot \"" + graph1a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, 4 notitle with lines\n") qf.close() qc.close() graph2 = os.path.join(outputDir, "rmsdgyre_hyst_" + str(q)) qe = open(graph2 + ".scri", "w") qe.write("set terminal png size 2400,800\nset output \"" + graph2 + ".png\"\n") qe.write('set xlabel "RMSD after Gyre"\n') qe.write('set ylabel "Number of models"\n') qe.write("unset key\n") qe.write("""set xtics rotate out set style data histograms set style fill solid border set style histogram clustered """) qn = open(graph2 + ".data", "w") qn.write("#\tX\tY\n") dicr = {} for ele in val: if str("%.2f" % ele["rmsd_gyre"]) not in dicr: dicr[str("%.2f" % ele["rmsd_gyre"])] = 1 else: dicr[str("%.2f" % ele["rmsd_gyre"])] += 1 for key in sorted(dicr.keys(), lambda x, y: cmp(float(x), float(y))): hw = dicr[key] qn.write(key + "\t" + str(hw) + "\n") qe.write("plot \"" + graph2 + ".data\" using 2:xticlabels(1) title \"Frequency rmsd_gyre\"\n") qn.close() qe.close() graph3 = os.path.join(outputDir, "init_fin_" + str(q)) graph3a = os.path.join(outputDir, "init_fin_filtered_no_tr_" + str(q)) graph3b = os.path.join(outputDir, "init_fin_filtered_no_tr_bad_core_" + str(q)) qe = open(graph3 + ".scri", "w") qea = open(graph3a + ".scri", "w") qeb = open(graph3b + ".scri", "w") qe.write("set terminal png size 1000,1000\nset output \"" + graph3 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set xlabel "RMSD initial model vs final structure"\n') qe.write('set ylabel "RMSD gyred model vs final structure"\n') qe.write("unset key\n") qea.write("set terminal png size 1000,1000\nset output \"" + graph3a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set xlabel "RMSD initial model vs final structure"\n') qea.write('set ylabel "RMSD gyred model vs final structure"\n') qea.write("unset key\n") qeb.write("set terminal png size 1000,1000\nset output \"" + graph3b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set xlabel "RMSD initial model vs final structure"\n') qeb.write('set ylabel "RMSD gyred model vs final structure"\n') qn = open(graph3 + ".data", "w") qn.write("#\tX\tY\tC\n") qna = open(graph3a + ".data", "w") qna.write("#\tX\tY\tC\n") qnb = open(graph3b + ".data", "w") qnb.write("#\tX\tY\tC\n") for ele in sorted(val, lambda x, y: cmp(x["rmsd_init_common"], y["rmsd_init_common"])): if ele["model"] in CC_valuta: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write(str("%.2f" % ele["rmsd_init_common"]) + "\t" + str( "%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") else: qn.write( str("%.2f" % ele["rmsd_init_common"]) + "\t" + str("%.2f" % ele["rmsd_fin_common"]) + "\t" + str( "%.2f" % 100) + "\n") qe.write("plot \"" + graph3 + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qn.close() qe.close() qea.write("plot \"" + graph3a + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qna.close() qea.close() qeb.write("plot \"" + graph3b + ".data\" using 1:2:3 title \"points\" pt 7 ps 2 palette, x notitle\n") qnb.close() qeb.close() graph4 = os.path.join(outputDir, "rmsd_diffn_" + str(q)) graph4a = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_" + str(q)) graph4b = os.path.join(outputDir, "rmsd_diffn_filtered_no_tr_bad_core_" + str(q)) qe = open(graph4 + ".scri", "w") qea = open(graph4a + ".scri", "w") qeb = open(graph4b + ".scri", "w") qe.write("set terminal png size 4000,500\nset output \"" + graph4 + ".png\"\n") qe.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qe.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qe.write('set xlabel "Model"\n') qe.write('set ylabel "Rmsd diff"\n') qe.write('set xtics rotate out\n') qe.write("unset key\n") qea.write("set terminal png size 3000,500\nset output \"" + graph4a + ".png\"\n") qea.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qea.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qea.write('set xlabel "Model"\n') qea.write('set ylabel "Rmsd diff"\n') qea.write('set xtics rotate out\n') qea.write("unset key\n") qeb.write("set terminal png size 2000,500\nset output \"" + graph4b + ".png\"\n") qeb.write('set palette defined ( 70 "#66FF33", 80 "#FF6600", 85 "#0066FF", 90 "#000000", 100 "#CC00CC" )\n') qeb.write("unset key\n") qeb.write('set title "Diff RMSD between initial vs ent and gyred vs ent"\n') qeb.write('set xlabel "Model"\n') qeb.write('set ylabel "Rmsd diff"\n') qeb.write('set xtics rotate out\n') qn = open(graph4 + ".data", "w") qn.write("#\tN\tX\tY\tC\n") qna = open(graph4a + ".data", "w") qna.write("#\tN\tX\tY\tC\n") qnb = open(graph4b + ".data", "w") qnb.write("#\tN\tX\tY\tC\n") index = 1 indexa = 1 indexb = 1 for ele in sorted(val, lambda x, y: cmp(x["model"], y["model"])): if ele["model"] in CC_valuta: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") qna.write(str(indexa) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexa += 1 if ele["core_common"] >= (ele["size"] * 60) / 100.0: qnb.write( str(indexb) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % CC_valuta[ele["model"]]["wMPE_init"][0]) + "\n") indexb += 1 else: qn.write(str(index) + "\t" + str(ele["model"]) + "\t" + str("%.2f" % ele["rmsd_diff"]) + "\t" + str( "%.2f" % 100) + "\n") index += 1 qe.write( "plot \"" + graph4 + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qn.close() qe.close() qea.write( "plot \"" + graph4a + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qna.close() qea.close() qeb.write( "plot \"" + graph4b + ".data\" using 1:3:4:xtic(2) title \"points\" pt 7 ps 2 palette, 0 notitle with lines\n") qnb.close() qeb.close() def fillClusters(DicParameters, CluAll, merged_list, unmerged_list, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg): """ :param DicParameters: :type DicParameters: :param CluAll: :type CluAll: :param merged_list: :type merged_list: :param unmerged_list: :type unmerged_list: :param convNames: :type convNames: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param cell_dim: :type cell_dim: :param clusteringAlg: :type clusteringAlg: :param threshold_alg: :type threshold_alg: :return: :rtype: """ global LAST_AVAILABLE_ROTID # NOTE: fill the cluster of the merged_list i = 1 for merged in merged_list: while 1: if not os.path.exists(merged): time.sleep(1) continue break # C,e = readClustersFromSUM(merged) e, C, Rc, er = readClustersFromSUMToDB(DicParameters, merged, "ROTSOL") npt = os.path.join(os.path.split(merged)[0], "./0") try: shutil.rmtree(npt) except: "Cannot remove", npt for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) CluAll[rota["n_prev_cluster"]]["heapSolutions"].push(prio, rota) convNames[rota["name"]] = e[blabla] # print "Merging",rota["name"],"in Cluster",rota["n_prev_cluster"] i += 1 # NOTE: for the unmerged clustering between them should be done and then add those clusters to CluAll list_rot_unmerged = [] for unmerged in unmerged_list: while 1: if not os.path.exists(unmerged): time.sleep(1) continue break C, e = readClustersFromSUM(unmerged) for c in C: for item in c["heapSolutions"]: prio, rota = item blabla = rota["name"] rota["name"] = rota["name"] + "_" + str(i) rota["n_prev_cluster"] = LAST_AVAILABLE_ROTID rota["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) dic = {"heapSolutions": ADT.Heap()} dic["heapSolutions"].push(prio, rota) list_rot_unmerged.append(dic) convNames[rota["name"]] = e[blabla] LAST_AVAILABLE_ROTID += 1 # print "Unmerged",rota["name"],"assigned cluster",LAST_AVAILABLE_ROTID i += 1 print "Start clustering unmerged rotations #:", len(list_rot_unmerged) performed, Clud = unifyClustersEquivalent(list_rot_unmerged, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "...Done! Merged clusters", len(Clud) for c in Clud: CluAll.append(c) # print "Final number of clusters: ",CluAll print "Clusters before trying unify...", len(CluAll) performed, CluAll = unifyClustersEquivalent(CluAll, convNames, quate, laue, ncs, cell_dim, clusteringAlg, threshold_alg) print "Clusters after having unified", len(CluAll) CluAll, convNames = applyFilterName(CluAll, convNames) return CluAll, convNames def localizeModelFragmentsInRealStructure(mosedDir, predictedDir, outDir): """ :param mosedDir: :type mosedDir: :param predictedDir: :type predictedDir: :param outDir: :type outDir: :return: :rtype: """ dictRes = {} if not os.path.exists(outDir): os.makedirs(outDir) for root, subFolders, files in os.walk(predictedDir): for fileu in files: pdbf = os.path.join(root, fileu) if not pdbf.endswith(".pdb"): continue tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf, False) # print pdbf # print tupleResult # print "--------------------------" lisBigSS = tupleResult[1] strucc = tupleResult[0] cont = 0 idname = (os.path.basename(pdbf))[:-4] dictRes[idname] = [10000, ""] for root2, subFolders2, files2 in os.walk(mosedDir): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue tupleResult2 = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(pdbf2, False) lisBigSS2 = tupleResult2[1] strucc2 = tupleResult2[0] i = 0 idname2 = (os.path.basename(pdbf2))[:-4] translateStructurebyCentroidMass(cont, i, strucc, strucc2, outDir) tupleResultB = Bioinformatics.getFragmentListFromPDBUsingAllAtoms( outDir + str(cont) + "_" + str(i) + "_rottra.pdb", False) struFinal = tupleResultB[0] BlistFrags = tupleResultB[1] nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(lisBigSS2, BlistFrags, 10, 0, "A") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True # TODO:mettere l'info dei residui riguardo tutti i frammenti non solo quelli del primo nella list preS = (((lisBigSS[0])["resIdList"])[0])[1] preF = (((lisBigSS[0])["resIdList"])[-1])[1] realS = (((lisBigSS2[0])["resIdList"])[0])[1] realF = (((lisBigSS2[0])["resIdList"])[-1])[1] rmsd = Bioinformatics.getRMSD(pdbf2, outDir + str(cont) + "_" + str(i) + "_rottra.pdb", "PDB", listmodel=lisBigSS2, doNotMove=True) rmsd = rmsd[0] if rmsd < dictRes[idname][0]: if dictRes[idname][1] != "": os.remove(dictRes[idname][1]) shutil.copyfile(outDir + str(cont) + "_" + str(i) + "_rottra.pdb", outDir + "best_" + str(idname) + ".pdb") dictRes[idname] = [rmsd, outDir + "best_" + str(idname) + ".pdb", result, comp_windows, nWindows, idname2, anyWay, preS, preF, realS, realF] elif result: print "Parallel but not best rmsd:" print "idname1", idname, preS, preF print "idname2", idname2, realS, realF print "actual rmsd", dictRes[idname][0] print "FOUND RMSD:", rmsd, "FILE", outDir + "best_" + str(idname) print comp_windows os.remove(outDir + str(cont) + "_" + str(i) + "_rottra.pdb") cont += 1 i += 1 return dictRes def anisotropyCorrection_and_test(cm, sym, DicGridConn, DicParameters, current_dir, mtz, F, SIGF, Intensities, Aniso, formfactors, nice, pda, hkl, ent, shelxe_line): """ Performs phaser's anisotropy and tncs correction, as well as a shelxe test job. Tests both programs in the chosen computing mode. :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param DicParameters: :type DicParameters: :param current_dir: :type current_dir: :param mtz: :type mtz: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param formfactors: :type formfactors: str :param nice: :type nice: int :param pda: path to the pda file :type pda: str :param hkl: path to the hkl file :type hkl: str :param ent: path to the ent file :type ent: str :param shelxe_line: line to use in the call to shelxe :type shelxe_line: str :return: :rtype: """ global NUMBER_OF_FILES_PER_DIRECTORY global PATH_NEW_PHASER global PATH_NEW_SHELXE # CONFIGURE NUMBER OF PARALLEL FILES if cm != None and cm.type_grid == "torque": NUMBER_OF_FILES_PER_DIRECTORY = cm.parallel_jobs # TEST PYTHON VERSION info_p = sys.version_info info_g = (sys.version).splitlines() print "Your Python version is: ", info_g[0] if info_p[0] == PYTHON_V[0] and info_p[1] == PYTHON_V[1]: if info_p[2] != PYTHON_V[2]: print colored("OK:", "green"), "Your python version is compatible with this standalone" else: print colored("OK:", "green"), "Your python version is compatible with this standalone" elif info_p[0] == PYTHON_V[0] and info_p[1] > PYTHON_V[1]: print colored("WARNING:", "yellow"), "Your python installation is newer than required,\nno issues have been encountered with this version.\nIf you experience an error, please, try the version " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " and report the bug." else: print colored("FATAL:", "red"), "Your python version is not updated and it is incompatible with this standalone.\nPlease, install the Python " + str( PYTHON_V[0]) + "." + str(PYTHON_V[1]) + "." + str(PYTHON_V[2]) + " version." sys.exit(1) # TEST DIRECTORY and MTZ if not os.path.exists(current_dir): print colored("FATAL:", "red"), "The working directory: " + str( current_dir) + " does not exist or it is not accessible for the user: " + str(getpass.getuser()) sys.exit(1) if not os.path.exists(mtz): print colored("FATAL:", "red"), "The mtz: " + str( os.path.abspath(mtz)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) sys.exit(1) # TEST CONNECTION try: SystemUtility.open_connection(DicGridConn, DicParameters, cm) except: print colored("FATAL", "red"), print "remote connection cannot be established. You were trying to connect to the remote grid at:" print colored("host", "blue"), print DicGridConn["host"] print colored("with port", "blue"), print DicGridConn["port"] print colored("as user", "blue"), print DicGridConn["username"] print 'Please, check again that your credentials (including your key or password) are correct,\nand if the ' \ 'problem persists contact your local administrator to report this error.' sys.exit(1) # TEST PHASER PATH and VERSION outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid # Try to guess the phaser path in case it has not been set if len(os.path.split(PATH_NEW_PHASER)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_PHASER], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_PHASER = out.strip() # If it has been set but it does not exist, raise an error if not os.path.exists(PATH_NEW_PHASER): print colored("FATAL:", "red"), "The path given for phaser: " + str( os.path.abspath(PATH_NEW_PHASER)) + " does not exist or it is not accessible to the user: " + str( getpass.getuser()) sys.exit(1) # Write a mock sh file to test phaser is working, afterwards remove unnecessary files f = open(os.path.join(current_dir, "th12323.sh"), "w") f.close() f = open(os.path.join(current_dir, "th12323.sh"), "r") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() os.remove(os.path.join(current_dir, "th12323.sh")) if os.path.exists(os.path.join(current_dir, "PHASER.sum")): os.remove(os.path.join(current_dir, "PHASER.sum")) outlines = out.splitlines() else: # remote_grid case cm.create_remote_file("th12323.sh") out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_PHASER) + ' < th12323.sh', cm.promptB) outlines = out.splitlines() cm.remove_remote_file("th12323.sh") cm.remove_remote_file("PHASER.sum") version_ok = False for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for phaser: " + str(os.path.abspath(PATH_NEW_PHASER)) + \ " does not exist in the remote workstation or it is not accessible for " \ "the remote user: " + DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False sys.exit(1) elif PHASER_V2 in lin: version_ok = True if not version_ok: print colored("FATAL:","red"), "The Phaser version is not compatible with this standalone. " \ "Please, update your version to ", str(PHASER_V2) sys.exit(1) # TEST GRID current_dir2 = "" if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir("testx1245") print cm.change_remote_dir("testx1245") cm.copy_local_file(mtz, os.path.basename(mtz), send_now=True) # copy the file not create the link cm.copy_local_file(hkl, "test12345.hkl", send_now=True) if os.path.exists(ent): cm.copy_local_file(ent, "test12345.ent", send_now=True) # TEST PHASER f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") # NOTE CM: testing microED f.write(formfactors+ "\n") f.write("RFACTOR USE OFF CUTOFF 35"+"\n") # NOTE CM: testing microED f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") f.write("NORM EPSFAC WRITE anis.norm" + "\n") f.write("TNCS PATT MAPS ON" + "\n") # CM: will write out a patterson map to check if non ori peaks are found f.write('ROOT anis\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "anis.sh"), "anis.sh", send_now=True) listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) def startANISO(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice -n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startANISO, current_dir, "anis") p.start() break else: print "FATAL ERROR: ARCIMBOLDO cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires ARCIMBOLDO to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("anis" + DicParameters["nameExecution"]) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = "anis.sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile(mtz, False) job.addInputFile("anis.sh", False) job.addOutputFile("anis.out", False) # job.addOutputFile("anis.mtz",False) # We don't use it anymore cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Phaser Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) trial = 10 sleep = 1 # NOTE: Currently anisotropy correction can be quite long, it is better to have larger sleep values if cm is None: # multiprocessing trial = 60 sleep = 10 elif not hasattr(cm, "channel"): # local grid trial = 90 sleep = 11 else: # remote grid trial = 150 sleep = 12 nt = 0 error = False while True: nt += 1 print "Trying PHASER test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): # print "Checking stat of the file" out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("anis.out"))), cm.promptB) print out print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "anis.out")) and os.stat( os.path.join(current_dir, "anis.out")).st_size > 0: f = open(os.path.join(current_dir, "anis.out"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Phaser jobs. Maybe the Label for F and SIGF are incorrect or the mtz is not standard.\n If Phaser has produced some output, it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Phaser output is displayed, the Phaser job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("anis.out", os.path.join(current_dir, "anis.out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue # print cm.remove_remote_file("anis.out") # print cm.remove_remote_file("anis.sum") # print cm.remove_remote_file("anis.sh") wse = cm.get_remote_file("anis.mtz", os.path.join(current_dir, "anis.mtz"), only_get_this=True) wse = cm.get_remote_file("anis.tncs", os.path.join(current_dir, "anis.tncs"), only_get_this=True) wse = cm.get_remote_file("anis.norm", os.path.join(current_dir, "anis.norm"), only_get_this=True) break elif os.path.exists(os.path.join(current_dir, "anis.out")): wse = checkYOURoutput(os.path.join(current_dir, "anis.out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break else: # print "File ",os.path.join(outputDic,str(fromIn)+".out"),os.path.exists(os.path.join(outputDic,str(fromIn)+".out")) time.sleep(sleep) continue if os.path.exists(os.path.join(current_dir, "anis.sh")): os.remove(os.path.join(current_dir, "anis.sh")) if os.path.exists(os.path.join(current_dir, "anis.sum")): os.remove(os.path.join(current_dir, "anis.sum")) out_phaser = "" err_phaser = "" if os.path.exists(os.path.join(current_dir, "anis.out")): f = open(os.path.join(current_dir, "anis.out"), "r") out_phaser = f.read() f.close() if not error: print out_phaser os.remove(os.path.join(current_dir, "anis.out")) if not os.path.exists(os.path.join(current_dir, "anis.mtz")): print colored("FATAL", "red"), "It was not possible to generate the anisotropy scaled mtz of your data. The program will end now." error = True if error: sys.exit(1) # READING THE SPACEGROUP FROM PHASER OUT spaceGroup = readSpaceGroupFromOut(out_phaser) # READING THE CELL DIMENSIONS FROM PHASER OUT cell_dim = cellDimensionFromOut(out_phaser) # READING THE RESOLUTION FROM PHASER OUT resolution = resolutionFromOut(out_phaser) # READING THE NUMBER OF UNIQUE REFLECTIONS FROM PHASER OUT unique_refl = uniqueReflectionsFromOut(out_phaser) # Read whether translational NCS has been found or not tNCS_bool = detect_tNCS_from_phaser_out(out_phaser) # GENERATING PDA foc = open(os.path.join(current_dir, "test12345.pdb"), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc.write(pda) foc.close() # Test SHELXE if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) shutil.move(os.path.join(current_dir, "test12345.pdb"), os.path.join(current_dir, "test12345.pda")) if not os.path.exists(hkl): print colored("FATAL:", "red"), "The hkl: " + str( os.path.abspath(hkl)) + " does not exist or it is not readable by the user: " + str(getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) outlines = [] if (cm is None) or (not hasattr(cm, "channel")): # multiprocessing and local_grid if len(os.path.split(PATH_NEW_SHELXE)[0]) == 0: p = subprocess.Popen(["which", PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() PATH_NEW_SHELXE = out.strip() if not os.path.exists(PATH_NEW_SHELXE): print colored("FATAL:", "red"), "The path given for shelxe: " + str( os.path.abspath(PATH_NEW_SHELXE)) + " does not exist or it is not accessible by the user: " + str( getpass.getuser()) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() outlines = out.splitlines() else: # remote_grid out = cm.connection.send_command_to_channel(cm.channel, '' + str(PATH_NEW_SHELXE), cm.promptB) outlines = out.splitlines() version_ok = False for lin in outlines: if "Command not found" in lin: print colored("FATAL:", "red"), "The path given for shelxe: " + str(os.path.abspath( PATH_NEW_SHELXE)) + " does not exist in the remote workstation or it is not accessible by the remote user: " + \ DicGridConn["username"] SystemUtility.close_connection(DicGridConn, DicParameters, cm) version_ok = False if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) elif lin.startswith(' -Q'): shelxe_beta=True version_ok = True break if not version_ok: print colored("FATAL:", "red"), "The SHELXE version is not compatible with this standalone. Please, update your version" if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(current_dir, "test12345.pda"), "test12345.pda", send_now=True) else: shutil.copyfile(hkl, os.path.join(current_dir, "test12345.hkl")) if os.path.exists(ent): shutil.copyfile(ent, os.path.join(current_dir, "test12345.ent")) def startExpJob(outputDirectory, op, lineargs): print "Executing..." print PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + ".pda"), " ".join(lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + ".pda"] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = "" if hasattr(cm, "channel"): listaDirec = cm.get_remote_pwd() else: listaDirec = os.path.abspath(current_dir) nq = 1 lia = shelxe_line.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shelxe_line) nl = 0 if cm is None: try: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, current_dir, "test12345", lia) p.start() break else: print "FATAL ERROR: Arcimboldo cannot load correctly information on CPUs." sym.couldIClose = True sys.exit(0) except KeyboardInterrupt: print "The user requires Arcimboldo to exit." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob("shelt" + DicParameters["nameExecution"]) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile("test12345.hkl", False) job.addInputFile("test12345.pda", False) if os.path.exists(ent): job.addInputFile("test12345.ent", False) lia = shelxe_line.split() lio = ["test12345.pda"] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJob(job, isthelast=True, forcesubmit=True) if nq < 0: print colored("FATAL", "red"), "Shelxe Job could not be queued in your cluster.\nPlease, check configuration or contact the Administrator." if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) sys.exit(1) nt = 0 error = False while True: nt += 1 print "Trying SHELXE test ", nt, "/", trial if nt >= trial: if hasattr(cm, "channel"): out = cm.connection.send_command_to_channel(cm.channel, 'stat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) outlines = out.splitlines() ok_stat = False for word in outlines: if word.strip().startswith("File:"): ok_stat = True break ok_size = False for word in outlines: if word.strip().startswith("Size:"): if float(word.strip().split()[1]) > 0: ok_size = True break if ok_stat and ok_size: print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now. Please check for errors:\n\n" print "===========================================================================================" error = True if ok_stat: out = cm.connection.send_command_to_channel(cm.channel, 'cat ' + str( os.path.join(cm.get_remote_pwd(), os.path.basename("test12345.lst"))), cm.promptB) print out print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue else: if os.path.exists(os.path.join(current_dir, "test12345.lst")) and os.stat( os.path.join(current_dir, "test12345.lst")).st_size > 0: f = open(os.path.join(current_dir, "test12345.lst"), "r") allf = f.read() f.close() print colored("FATAL", "red"), "It is impossible to perform Shelxe jobs. Maybe the .hkl is not correct or shelxe arguments are incorrect.\n If Shelxe has produced some output it will be printed now, please check for errors:\n\n" print "===========================================================================================" error = True print allf print "============================================================================================" print print "If no Shelxe output is displayed, the Shelxe job could not be launched at all. This means that some error has occurred with the local/remote grid or multiprocessing configuration or the Shelxe version has expired. Please, check again the relative configuration or contact your Administrator." break else: print "It seems the job is still in the queue and it is waiting to be processed" nt = 1 continue if hasattr(cm, "channel"): wse = cm.get_remote_file("test12345.lst", os.path.join(current_dir, "test12345.lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST, only_get_this=True, tryonetime=True) if isinstance(wse, bool) and not wse: time.sleep(sleep) continue break elif os.path.exists(os.path.join(current_dir, "test12345.lst")): wse = checkYOURoutput(os.path.join(current_dir, "test12345.lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, sleep_ifnot_ready=False) if isinstance(wse, bool) and not wse: time.sleep(20) continue break else: time.sleep(sleep) continue initCC = None fneed = False if os.path.exists(os.path.join(current_dir, "test12345.lst")): f = open(os.path.join(current_dir, "test12345.lst"), "r") allshe = f.read() f.close() lines = allshe.split("\n") for up in range(len(lines)): line = lines[up] if (line.strip()).startswith("Overall CC between"): line3 = line.strip() line3L = line3.split() initCC = float((line3L[-1])[:-1]) if (line.strip()).startswith("<|E^2-1|>"): line3 = line.strip() line3L = line3.split() if len(line3L) > 3: fneed = (line3L[4] == "-f" and line3L[5] == "missing") else: fneed = False if initCC == None: print print allshe print print colored("FATAL", "red"), "The Shelxe test job has failed. Please, read the shelxe output for errors. Check shelxe line arguments, shelxe version and hkl format correctness." error = True if os.path.exists(os.path.join(current_dir, "test12345.pda")): os.remove(os.path.join(current_dir, "test12345.pda")) if os.path.exists(os.path.join(current_dir, "test12345.hkl")): os.remove(os.path.join(current_dir, "test12345.hkl")) if os.path.exists(os.path.join(current_dir, "test12345.phs")): os.remove(os.path.join(current_dir, "test12345.phs")) if os.path.exists(os.path.join(current_dir, "test12345.ent")): os.remove(os.path.join(current_dir, "test12345.ent")) if os.path.exists(os.path.join(current_dir, "test12345.pdo")): os.remove(os.path.join(current_dir, "test12345.pdo")) if os.path.exists(os.path.join(current_dir, "test12345_trace.ps")): os.remove(os.path.join(current_dir, "test12345_trace.ps")) if os.path.exists(os.path.join(current_dir, "test12345.lst")): os.remove(os.path.join(current_dir, "test12345.lst")) if hasattr(cm, "channel"): print cm.change_remote_dir(current_dir2) print cm.remove_remote_dir("testx1245") SystemUtility.close_connection(DicGridConn, DicParameters, cm) if error: sys.exit(1) return os.path.join(current_dir, "anis.mtz"), os.path.join(current_dir, "anis.norm"), \ os.path.join(current_dir,"anis.tncs"), F, SIGF, spaceGroup, cell_dim, resolution, unique_refl,\ out_phaser, err_phaser, fneed, shelxe_beta, tNCS_bool def anisotropyCorrection(current_dir, mtz, F, SIGF, Intensities, nice): """ :param current_dir: :type current_dir: :param mtz: :type mtz: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param nice: :type nice: :return: :rtype: """ f = open(os.path.join(current_dir, "anis.sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NCS" + "\n") f.write("MR_NCS" + "\n") if cm is not None or hasattr(cm, "channel"): f.write('HKLIN ' + str(os.path.basename(mtz)) + "\n") else: f.write('HKLIN ' + str(os.path.abspath(mtz)) + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Anisotropy and TNCS Correction" + "\n") f.write("TNCS EPSFAC WRITE anis.tncs" + "\n") # writes tncs corrections to a binary file f.write("NORM EPSFAC WRITE anis.norm" + "\n") # writes anisotropy corrections to a binary file f.write("TNCS PATT MAPS ON" + "\n") # writes a patterson map to check if non ori peaks are found f.write('ROOT "' + os.path.join(current_dir, "anis") + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() f = open(os.path.join(current_dir, "anis.sh"), "r") p = None p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() f.close() print out print err # PULIZIA FILES INUTILI os.remove(os.path.join(current_dir, "anis.sh")) os.remove(os.path.join(current_dir, "anis.sum")) # NOTE: if to be used with non corrected mtz, change the labels return os.path.join(current_dir, "anis.mtz"), \ os.path.join(current_dir, "anis.norm"), \ os.path.join(current_dir,"anis.tncs"), \ F + "_ISO", SIGF + "_ISO", out, err def executePicasso(rootdir, sym, nameJob, model_file, quate, cell_dim, laue, ncs, Clusters, ensembles, ent, idealhelixpdb): """ :param rootdir: :type rootdir: :param sym: :type sym: :param nameJob: :type nameJob: :param model_file: :type model_file: :param quate: :type quate: :param cell_dim: :type cell_dim: :param laue: :type laue: :param ncs: :type ncs: :param Clusters: :type Clusters: :param ensembles: :type ensembles: :param ent: :type ent: :param idealhelixpdb: :type idealhelixpdb: :return: :rtype: """ dictRMSD = {} print "Initiliazing Picasso Post-Mortem Process..." dbdir = os.path.join(rootdir, "pmdb") if os.path.exists(dbdir): shutil.rmtree(dbdir) os.makedirs(dbdir) shutil.copyfile(ent, os.path.join(dbdir, os.path.basename(ent)[:-4] + ".pdb")) tupleResult = Bioinformatics.getFragmentListFromPDB(model_file, True, False) listaFra = tupleResult[1] old_ens_path = None pathide = None if len(listaFra) == 1 and listaFra[0]["sstype"] in ["ah", "ch"]: old_ens_path = ensembles.values()[0] new_ens_path = old_ens_path + ".bak" shutil.copyfile(old_ens_path, new_ens_path) print "Computing Gyre LLG for all possible rotations of helices of", listaFra[0]["fragLength"], "aa" print "Generating models..." print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" print "Skipping Gyre LLG test! " else: howmany = [] pathide = os.path.join(rootdir, nameJob + "_idealhelix/") if os.path.exists(pathide): shutil.rmtree(pathide) shutil.move(os.path.join(rootdir, "./library/"), pathide) f2 = open(os.path.join(rootdir, nameJob + "_idealrot_pm.txt"), "w") for root2, subFolders2, files2 in os.walk(pathide): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue f2.write(pdbf2 + "\tRMSD:" + str(rmsd) + "\n") howmany.append(pdbf2) pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][ 54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() f2.close() dipl = {} C = [] hp = ADT.Heap() brncv = {} rota = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y] rol["llg"] = 0.0 rol["zscore"] = 0.0 hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, pathide, "examine", brncv) if listaFra[0]["fragLength"] >= 13: new_length = listaFra[0]["fragLength"] - 4 helix_list = Bioinformatics.getIdealHelicesFromLenghts([new_length], idealhelixpdb) f = open(old_ens_path, "w") f.write(helix_list[0][0]) f.close() print "Generating all rotation clusters and rotational symmetry equivalent in ", os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "..." getTheTOPNOfEachCluster({}, 1, os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), "matrix", quate, Clusters, ensembles, 1, True, False, None, True, cell_dim, laue, ncs, modeTra="Cmass") print "Starting BORGES to find", model_file, "into", ent, "..." Parameters = {} Parameters["model"] = model_file Parameters["dir"] = dbdir Parameters["wdir"] = rootdir Parameters["continous"] = 70 Parameters["jumps"] = 70 Parameters["ncssearch"] = False Parameters["multimer"] = True Parameters["rmsd_min"] = 0.0 Parameters["rmsd_max"] = 6.0 Parameters["rmsd_clustering"] = 0.0 Parameters["exclude_residues_superpose"] = 1 Parameters["nilges"] = 5 Parameters["enhance_fold"] = True pars, opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) BORGES_MATRIX.startBORGES_MATRIX(pars, opt, doCluster=False, superpose=False, sym=sym, process_join=True) if not os.path.exists(os.path.join(rootdir, "./library/")): print "BORGES cannot detect the fold in the real structure. Picasso will end!" try: shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) shutil.rmtree(os.path.join(rootdir, "Final_Frags_" + nameJob + "/")) shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.rmtree(os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) except: pass return {}, None if os.path.exists(os.path.join(rootdir, nameJob + "_mosed")): shutil.rmtree(os.path.join(rootdir, nameJob + "_mosed")) shutil.move(os.path.join(rootdir, "./library/"), os.path.join(rootdir, nameJob + "_mosed")) for root2, subFolders2, files2 in os.walk(os.path.join(rootdir, nameJob + "_mosed")): for fileu2 in files2: pdbf2 = os.path.join(root2, fileu2) if not pdbf2.endswith(".pdb"): continue (rmsd, nref, ncom, allAtoms, compStru, pda, dictiocorresp) = Bioinformatics.getSuperimp(pdbf2, model_file, "PDB", getDictioCorresp=True) if rmsd < 0 or rmsd > 1.0: print pdbf2, "excluded..." continue pdapp = pda[0].splitlines() fc = open(pdbf2, "r") linc = fc.readlines() fc.close() pda = "" reference = Bioinformatics.getStructure("ref", pdbf2) natd = 1 for l in range(len(pdapp)): posi = pdapp[l][17:27] newposi = dictiocorresp[posi] + ' ' linat = str('%-5s' % (natd)) linea_stringa = pdapp[l][:6] + linat + pdapp[l][11:17] + newposi + pdapp[l][30:54] + pdapp[l][54:] natd += 1 pda += linea_stringa + "\n" f = open(pdbf2, "w") f.write(pda) f.close() print "Artificially computing translations for model", model_file dictRMSD = localizeModelFragmentsInRealStructure(os.path.join(rootdir, nameJob + "_mosed"), os.path.join(rootdir, "Final_Frags_" + nameJob + "/"), os.path.join(rootdir, "Frags_ROTTRA_" + nameJob + "/")) f = open(os.path.join(rootdir, nameJob + "_rot_pm.txt"), "w") for key in sorted(dictRMSD.keys()): value = dictRMSD[key] linea = "PREDICTED: " + str(key) + " [" + str(value[7]) + ":" + str(value[8]) + "]" + "\t\t" + "RMSD: " + str( value[0]) + "\t" + "FILE: " + str(value[1]) + "\t" + "REAL: " + str(value[5]) + " [" + str( value[9]) + ":" + str(value[10]) + "]" f.write(linea + "\n") linea = "DIST_VAL: " + str(value[2]) f.write(linea + "\n") for i in range(len(value[6])): linea = "\t[" f.write(linea + "\n") for t in range(len((value[6])[i])): linea = "\t\t" + str(((value[6])[i])[t]) f.write(linea + "\n") linea = "\t]" f.write(linea + "\n") linea = "---------------------------------------------------------------" f.write(linea + "\n") f.close() print "End of Picasso Post-Mortem Process" # CLEANING UP shutil.rmtree(dbdir) if old_ens_path != None: os.remove(old_ens_path) shutil.move(new_ens_path, old_ens_path) # shutil.rmtree(os.path.join(rootdir,nameJob+"_mosed")) # shutil.rmtree(os.path.join(rootdir,"Final_Frags_"+nameJob+"/")) # shutil.rmtree(os.path.join(rootdir,"Frags_ROTTRA_"+nameJob+"/")) if pathide is not None: return dictRMSD, os.path.join(pathide, "examine.sum") else: return dictRMSD, None def analyze_all_solutions(sym, i, DicParameters, CCV, cnv, hkl, cell_dim, spaceGroup, shlxLinea0, mosed_dir, outputDire): """ :param sym: :type sym: :param i: :type i: :param DicParameters: :type DicParameters: :param CCV: :type CCV: :param cnv: :type cnv: :param hkl: :type hkl: :param cell_dim: :type cell_dim: :param spaceGroup: :type spaceGroup: :param shlxLinea0: :type shlxLinea0: :param mosed_dir: :type mosed_dir: :param outputDire: :type outputDire: :return: :rtype: """ if os.path.exists(outputDire): shutil.rmtree(outputDire) os.makedirs(outputDire) for item in CCV: ent = item["corresp"] structure = Bioinformatics.getStructure("ent", ent) atomi = [] for model in structure: for chain in model: for resi in chain: for aty in resi: atomi += [aty] listatm = translateListByFrac(0, item["model"], atomi, item["shift_origin"], cell_dim, return_atoms=True) pdball = Bioinformatics.getPDBFromListOfAtom(listatm) CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f = open(os.path.join(outputDire, os.path.basename(ent)), "w") f.write(CRD.format(**data) + "\n") f.write(pdball[0]) f.close() """ ent = os.path.join(outputDire,os.path.basename(ent)) (nqueue6,convNames6) = startExpansion(sym,"20_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),hkl,ent,cell_dim,spaceGroup,shlxLinea0,mosed_dir) CC_Val1 = evaluateExp_CC(DicParameters, sym, "6_EXPVAL_"+str(i), os.path.join(outputDire,"6_EXPVAL_LIBRARY/"),nqueue6,convNames6,isArcimboldo=True) shutil.move(os.path.join(outputDire,"6_EXPVAL_LIBRARY/solCC.sum"),os.path.join(outputDire,"sol_"+os.path.basename(ent)[:-4]+".sum")) shutil.rmtree(os.path.join(outputDire,"6_EXPVAL_LIBRARY/")) """ def trim_small_chains(pdbf, minRes): """ :param pdbf: :type pdbf: :param minRes: :type minRes: :return: :rtype: """ struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > minRes: chn.append(ch) chains = chn how = 0 allAtm = [] for cha in chains: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] f = open(pdbf, "w") f.write(pdball) f.close() return how def generate_chunks_byChain_progressive(pdbf, min_res, direc): """ :param pdbf: :type pdbf: :param min_res: :type min_res: :param direc: :type direc: :return: :rtype: """ listPDBS = [] struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = len(chn) print "====", min_res, chains - 1 for t in range(min_res, chains): print "evaluating", t listP = generate_chunks_byChain(pdbf, t, direc, addnum=len(listPDBS)) # print "=================================" # print listP # print "=================================" listPDBS += listP return listPDBS def generate_chunks_byChain(pdbf, omit_ss, direc, addnum=0): """ :param pdbf: :type pdbf: :param omit_ss: :type omit_ss: :param direc: :type direc: :param addnum: :type addnum: :return: :rtype: """ if not (os.path.exists(direc)): os.makedirs(direc) struc = Bioinformatics.getStructure("ref", pdbf) chains = struc.get_list()[0].get_list() chn = [] for ch in chains: if len(ch.get_list()) > 3: chn.append(ch) else: print "Pruning chain", ch.get_id(), len(ch.get_list()) chains = chn listPDBS = [] for t in range(len(chains)): piece = None if t == 0: piece = chains[t + omit_ss:] elif t == len(chains) - omit_ss: piece = chains[:t] else: if t + omit_ss < len(chains): piece = chains[:t] + chains[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(chains) piece = chains[adn:t] # print adn,"-",t,len(piece) print "---", piece allAtm = [] how = 0 for cha in piece: how += len(cha.get_list()) for resi in cha: for atm in resi: allAtm.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm)[0] pdbid, a, b = os.path.basename(pdbf)[:-4].split("_") nomefi = pdbid + "_" + str(t + addnum + 1) + "_0.pdb" filename = os.path.join(direc, nomefi) f = open(filename, "w") f.write(pdball) f.close() listPDBS.append((filename, how)) return listPDBS def generate_chunks_bySS(pdbf, omit_ss, direc): """ :param pdbf: :type pdbf: :param omit_ss: :type omit_ss: :param direc: :type direc: :return: :rtype: """ if not (os.path.exists(direc)): os.makedirs(direc) tupleResult = Bioinformatics.getFragmentListFromPDB(pdbf, False, False) struc = tupleResult[0] listaFra = tupleResult[1] listPDBS = [] if not isinstance(omit_ss, tuple): for t in range(len(listaFra)): piece = None if t == 0: piece = listaFra[t + omit_ss:] elif t == len(listaFra) - omit_ss: piece = listaFra[:t] else: if t + omit_ss < len(listaFra): piece = listaFra[:t] + listaFra[t + omit_ss:] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_ss) - len(listaFra) piece = listaFra[adn:t] # print adn,"-",t,len(piece) pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) else: lista_ah = [] lista_bs = [] for fra in listaFra: if fra["sstype"] in ["ah", "ch"]: lista_ah.append(fra) elif fra["sstype"] in ["bs", "cbs"]: lista_bs.append(fra) num_ah = omit_ss[0] num_bs = omit_ss[1] piece_ah = [] for t in range(len(lista_ah)): piece = None if t == 0: piece = lista_ah[t + num_ah:] elif t == len(lista_ah) - num_ah: piece = lista_ah[:t] else: if t + num_ah < len(lista_ah): piece = lista_ah[:t] + lista_ah[t + num_ah:] else: adn = (t + num_ah) - len(lista_ah) piece = lista_ah[adn:t] piece_ah.append(piece) piece_bs = [] for t in range(len(lista_bs)): piece = None if t == 0: piece = lista_bs[t + num_bs:] elif t == len(lista_bs) - num_bs: piece = lista_bs[:t] else: if t + num_bs < len(lista_bs): piece = lista_bs[:t] + lista_bs[t + num_bs:] else: adn = (t + num_bs) - len(lista_bs) piece = lista_bs[adn:t] piece_bs.append(piece) for pie_ah in piece_ah: for pie_bs in piece_bs: piece = pie_ah + piece_bs pdball = Bioinformatics.getPDBFormattedAsString("0", piece, struc, "", useDizioConv=False)[1] qsa = os.path.basename(pdbf).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() how = 0 for fre in piece: how += fre["fragLength"] listPDBS.append((filename, how)) return listPDBS def generate_chunks_progressive(pdbf, from_omit_res, to_omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc): """ Iteratively calls the funcion to get the chunks using a different size for the omit each time. Args: pdbf (str): path to the template from_omit_res (int): to_omit_res (int): step_size (int): mode_type (str): polyBack (bool): mantCys (bool) fix_ss (str): can be None, remove_coil (str): can be None, maintain_coil, or remove_coil direc (str): Returns: listPDBS (list): indicators (dict): keys are the number of residues omited, and values a tuple with the range of models that correspond to each size """ listPDBS = [] indicators = {} for t in range(from_omit_res, to_omit_res): indicator_A = len(listPDBS) listPDBS += generate_chunks(pdbf=pdbf, omit_res=t, step_size=step_size, mode_type=mode_type, polyBack=polyBack, mantCys=mantCys, fix_ss=fix_ss, remove_coil=remove_coil, direc=direc, addnum=len(listPDBS)) indicator_B = len(listPDBS) indicators[t] = (indicator_A, indicator_B) return listPDBS, indicators def generate_chunks(pdbf, omit_res, step_size, mode_type, polyBack, mantCys, fix_ss, remove_coil, direc, addnum=0): """ Generates the omit-shredded fragments. Args: pdbf (str): path to the template omit_res (int): mode_type (str): polyBack (bool): mantCys (bool) fix_ss (str): can be None, remove_coil (str): can be None, maintain_coil, or remove_coil direc (str): addnum (int): Returns: listPDBS (list): """ pathname = copy.deepcopy(pdbf) if not (os.path.exists(direc)): os.makedirs(direc) listPDBS = [] limitis = 0 if fix_ss != None and remove_coil != None: tupleResult = Bioinformatics.getFragmentListFromPDB(pdbf=pathname, isModel=False, drawDistri=False) struc = tupleResult[0] listaFra = tupleResult[1] listAtoms = [] for fra in listaFra: if (fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs"): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: limitis += 1 for atm in residue: listAtoms.append(atm) if remove_coil == "maintain_coil": if not ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_ah") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_bs")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) elif remove_coil == "remove_coil": if ((fra["sstype"] in ["ah", "ch"] and fix_ss == "fix_bs") or ( fra["sstype"] in ["bs", "cbs"] and fix_ss == "fix_ah")): for resi in fra["resIdList"]: for model in struc: for chain in model: for residue in chain: if residue.get_id() == resi: for atm in residue: listAtoms.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(reference=listAtoms, renumber=True, uniqueChain=True)[0] f = open(pathname, "w") f.write(pdball) f.close() pdbf = cStringIO.StringIO(pdball) listAll = Bioinformatics.getListCA(name="model", pdbf=pdbf, mode="PDB", backbone=True, allInList=True, minResInChain=3) # print "Residui",len(listAll[0][0]) workList = listAll[0][0] fixed_list = [] if limitis > 0: workList = listAll[0][0][:limitis] fixed_list = listAll[0][0][limitis:] for t in range(0, len(workList), step_size): piece = None omitted = None if t == 0: piece = workList[t + omit_res:] omitted = workList[:t + omit_res] elif t == len(workList) - omit_res: piece = workList[:t] omitted = workList[t:] else: if t + omit_res < len(workList): piece = workList[:t] + workList[t + omit_res:] omitted = workList[t:t + omit_res] # print "0 -",t,"/",t+omit_res,"-",len(listAll[0][0]),len(piece) else: adn = (t + omit_res) - len(workList) piece = workList[adn:t] omitted = workList[:adn] + workList[t + 1:] # print adn,"-",t,len(piece) allAtm = [] newlip2 = [] for resi in piece: for atm in resi: allAtm.append(atm) vald = atm.get_parent().get_id()[1] if vald not in newlip2: newlip2.append(vald) newlip = [] omitatm = [] for resi in omitted: for atm in resi: vald = atm.get_parent().get_id()[1] omitatm.append(atm) if vald not in newlip: newlip.append(vald) if mode_type == "omit": allAtm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(allAtm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[ 0] elif mode_type == "fragment": omitatm += [item for sublist in fixed_list for item in sublist] pdball = \ Bioinformatics.getPDBFromListOfAtom(omitatm, chainFragment=True, polyala=polyBack, maintainCys=mantCys)[ 0] qsa = os.path.basename(pathname).split("_") filename = os.path.join(direc, qsa[0] + "-" + str(t + addnum) + "_" + str(qsa[1]) + "_" + str(qsa[2][:-4]) + ".pdb") f = open(filename, "w") f.write(pdball) f.close() if mode_type == "omit": listPDBS.append([filename, len(piece), newlip]) elif mode_type == "fragment": listPDBS.append([filename, len(omitatm), newlip2]) """ diffch = [] for resi in omitted: for atm in resi: diffch.append(atm) pdball = Bioinformatics.getPDBFromListOfAtom(allAtm,chainFragment=True,diffchain=diffch)[0] filename = os.path.join(direc,os.path.basename(pathname)[:-4]+"-"+str(t+addnum)+"B.pdb") f = open(filename,"w") f.write(pdball) f.close() #listPDBS.append((filename,len(piece))) """ return listPDBS def startOMITllg_nogyre(DicParameters, cm, sym, DicGridConn, model_file, sizes, nameJob, outputDire, sumPath, howmany, indic, number_cluster, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param model_file: :type model_file: :param sizes: :type sizes: :param nameJob: :type nameJob: :param outputDire: :type outputDire: :param sumPath: :type sumPath: :param howmany: :type howmany: :param indic: :type indic: :param number_cluster: :type number_cluster: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param RNP_GYRE: :type RNP_GYRE: :return: :rtype: """ convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, sumPath, "ROTSOL") Clusel = [] prio = None rota = None if number_cluster != None: for clu in CluAll: if clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] == number_cluster: Clusel = clu prio, rota = clu["heapSolutions"].asList()[0] break liall = sorted(Clusel["heapSolutions"].asList(), __cmp_rota2, reverse=True) if not (os.path.exists(outputDire)): os.makedirs(outputDire) fildname = os.path.join(outputDire, str(1) + "_graph") if not os.path.exists( os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] pr = None rk = None for item in liall: pr, rk = item if os.path.basename(convNames[rk["name"]]).split("-")[-1][:-4] == str(jk): break # NOTE: to be honest rk cannot be None because it will be the last rotation of the cluster if it is not found the correct # one, so llg will never be 0. To have rk == None a boolean flag or something else must be used, but in this way # like it is now maybe is even better because we put the lowest llg observed in the cluster for the models that # are not present in the cluster if rk == None: rk = {"llg": 0} else: qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str(key) + "TOP.data\" using 1:2 title \"t" + str( key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write( str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str(res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str(safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(number_cluster) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) """ dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire,"../../../library/peaks_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/pklat_"+str(number_cluster)+"_0.pdb"),os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble"+str(y) brncv[rol["name"]] = models_final[y] hp.push((-1*rol["llg"],-1*rol["zscore"]),rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames,CluAll,RotClu,encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire,"final.sum"),"ROTSOL") """ """ for du in Dust: for item in du["heapSolutions"].asList(): pi,ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf,3) """ return convNames def startOMITllg(DicParameters, cm, sym, DicGridConn, mode, sizes, nameJob, outputDire, model_file, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, convNames, quate, laue, ncs, clusteringAlg, cell_dim, thresholdCompare, evaLLONG, sumPath, howmany, indic, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, sampl=-1, VRMS=False, BFAC=False, trim_to_polyala=True, sigr=0.0, sigt=0.0, preserveChains=False, ent=None, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False,formfactors='FORMFACTORS XRAY'): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param mode: :type mode: :param sizes: :type sizes: :param nameJob: :type nameJob: :param outputDire: :type outputDire: :param model_file: :type model_file: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param spaceGroup: :type spaceGroup: :param frag_fixed: :type frag_fixed: :param convNames: :type convNames: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param clusteringAlg: :type clusteringAlg: :param cell_dim: :type cell_dim: :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param sumPath: :type sumPath: :param howmany: :type howmany: :param indic: :type indic: :param tops: :type tops: :param USE_TNCS: :type USE_TNCS: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param sampl: :type sampl: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param trim_to_polyala: :type trim_to_polyala: :param sigr: :type sigr: :param sigt: :type sigt: :param preserveChains: :type preserveChains: :param ent: :type ent: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param RNP_GYRE: :type RNP_GYRE: :return: :rtype: """ limit_times = 2 size_rnp = copy.deepcopy(sizes) if mode == "RNP": limit_times = len(sizes) Clust = [] cnv = {} if sumPath != None and os.path.exists(sumPath): Clust, cnv = readClustersFromSUM(sumPath) else: din = {"heapSolutions": ADT.Heap()} lo = {"name": "ensemble1", "llg": 1.0, "zscore": 1.0, "numInRlist": 1, "n_prev_cluster": 0, "original_rotcluster": "0", "euler": [0.0, 0.0, 0.0], "frac": [0.0, 0.0, 0.0], "quaternion": [0.0, 0.0, 0.0, 0.0], "elong": 0, "bfactor": 0.0, "rotationMatrices": [[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]]} din["heapSolutions"].push((-1 * lo["llg"], -1 * lo["zscore"]), lo) Clust.append(din) cnv["ensemble1"] = model_file mode = "FRF" Dust = [] for tru in Clust: sol = tru["heapSolutions"].pop() Dust.append({"heapSolutions": ADT.Heap()}) while sol != None: prio, rota = sol try: sol = tru["heapSolutions"].pop() except: sol = None if LIMIT_CLUSTER != None and rota["n_prev_cluster"] != LIMIT_CLUSTER: continue # print "---------",rota["name"],cnv[rota["name"]] number_cluster = rota["n_prev_cluster"] times = 0 if True: times += 1 if not os.path.exists( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")): dipl = {} C = [] hp = ADT.Heap() brncv = {} tdncv = {} for y in range(len(howmany)): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = howmany[y][0] tdncv[rol["name"]] = howmany[y][1] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "examine", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "examine.sum"), "ROTSOL") SystemUtility.open_connection(DicGridConn, DicParameters, cm) if mode == "RNP": ######METHOD RNP######## (nqueue10, convino) = startRNP(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times), CluAll, brncv, os.path.join(outputDire, str(times)), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, sampl=sampl, tops=tops, VRMS=VRMS, USE_TNCS=USE_TNCS, USE_RGR=USE_RGR, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL, RNP_GYRE=RNP_GYRE) CluAll, convNames, tolose = evaluateFTF(DicParameters=DicParameters, cm=cm, sym=sym, DicGridConn=DicGridConn, nameJob=nameJob + rota["name"] + "_" + str(times), outputDicr=os.path.join(outputDire, str(times)), nqueue=nqueue10, ensembles=brncv, excludeZscore=-10, fixed_frags=frag_fixed, quate=quate, mode="RNP", laue=laue, listNCS=ncs, clusteringMode=clusteringAlg, cell_dim=cell_dim, thresholdCompare=thresholdCompare, evaLLONG=evaLLONG, LIMIT_CLUSTER=LIMIT_CLUSTER, convNames=convino, tops=1) sumPACK = os.path.join(outputDire, "examine.sum") CluAll, convNames = mergeZSCOREinRNP(DicParameters, sumPACK, CluAll, convNames) writeSumClusters(CluAll, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames, LIMIT_CLUSTER=LIMIT_CLUSTER) elif mode == "FRF": (nqueue, convNames) = startFRF(DicParameters=DicParameters, cm=cm, sym=sym, nameJob=nameJob + rota["name"] + "_" + str(times), dir_o_liFile=os.path.join(outputDire, rota["name"] + "_" + str( times) + "_models"), outputDire=os.path.join(outputDire, str(times)), mtz=mtz, MW=MW, NC=NC, F=F, SIGF=SIGF, Intensities=Intensities, Aniso=Aniso, normfactors=normfactors, tncsfactors=tncsfactors, nice=nice, RMSD=RMSD, lowR=lowR, highR=highR, final_rot=75, save_rot=75, frag_fixed=frag_fixed, spaceGroup=spaceGroup, sampl=sampl, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) CluAll, Rotclu = evaluateFRF_clusterOnce(DicParameters=DicParameters, cm=cm, sym=sym, DicGridConn=DicGridConn, RotClu=[], nameJob=nameJob + rota["name"] + "_" + str(times), outputDicr=os.path.join(outputDire, str(times)), nqueue=nqueue, quate=quate, laue=laue, ncs=ncs, spaceGroup=spaceGroup, ensembles=convNames, clusteringAlg=clusteringAlg, excludeLLG=-100.0, fixed_frags=frag_fixed, cell_dim=cell_dim, thresholdCompare=thresholdCompare, evaLLONG=evaLLONG, isArcimboldo = False, tops = None, LIMIT_CLUSTER = None, applyNameFilter = False, candelete = True, giveids = False, merge = [], make_positive_llg = False) writeSumClusters(CluAll, outputDire, "clus", convNames) Clulu, cnvu = readClustersFromSUM(os.path.join(outputDire, "clus.sum")) Dur = [{"heapSolutions": ADT.Heap()}] cndn = {} for cr in Clulu: for item in cr["heapSolutions"].asList(): pr, ty = item pdf = cnvu[ty["name"]] vks = os.path.basename(pdf).split("-")[1][:-4] ty["name"] = "ensemble" + str(vks) cndn[ty["name"]] = pdf ty["n_prev_cluster"] = 0 ty["original_rotcluster"] = "0" Dur[0]["heapSolutions"].push((-1 * ty["llg"], -1 * ty["zscore"]), ty) writeSumClusters(Dur, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", cndn) # convNames = readClustersFromSUMToDB(DicParameters,os.path.join(outputDire, "clus_merged.sum"),"ROTSOL") elif mode == "BRF": nq, conv2 = startBRF(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) else: # if mode == "RGR": ######METHOD RGR######## nq, conv2 = startRGR(DicParameters, cm, sym, nameJob + rota["name"] + "_" + str(times) + "_A", CluAll, brncv, os.path.join(outputDire, str(times) + "_A"), mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, 1, spaceGroup, 75, sampl=sampl, LIMIT_CLUSTER=LIMIT_CLUSTER, USE_TNCS=USE_TNCS, isOMIT=True, VRMS=VRMS, BFAC=BFAC, sigr=sigr, sigt=sigt, preserveChains=preserveChains, BULK_FSOL=BULK_FSOL, BULK_BSOL=BULK_BSOL) SystemUtility.endCheckQueue() convNames, Clud = evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob + rota["name"] + "_" + str(times) + "_A", os.path.join(outputDire, str(times) + "_A"), True, cell_dim, quate, conv2, None, brncv, LIMIT_CLUSTER=LIMIT_CLUSTER, isOMIT=True, ent=ent) writeSumClusters(Clud, outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END", convNames) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) os.remove(os.path.join(outputDire, "examine.sum")) if os.path.exists(os.path.join(outputDire, "examine.sum")): os.remove(os.path.join(outputDire, "examine.sum")) Cnew, cnvnew = readClustersFromSUM( os.path.join(outputDire, "clust_" + rota["name"] + "_" + str(times) + "_END.sum")) liall = Cnew[0]["heapSolutions"].asList() fildname = os.path.join(outputDire, str(times) + "_graph") if not os.path.exists(os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb")) or not os.path.exists(os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb")) or not os.path.exists( os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")): qe = open(fildname + ".scri", "w") qc = open(fildname + "_var.scri", "w") qe.write("set terminal png size 2400,2400\nset output \"" + fildname + ".png\"\n") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_var.png\"\n") totl = (sizes[1] - sizes[0]) / 2 qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qe.write("set multiplot layout " + str(totl) + ",2 columnsfirst\n") res_wllg = {} qn = open(fildname + "_var.data", "w") qn.write("#\tX\tY\n") for key in sorted(indic.keys()): qo = open(fildname + "_" + str(key) + ".data", "w") qo.write("#\tX\tY\n") qot = open(fildname + "_" + str(key) + "TOP.data", "w") qot.write("#\tX\tY\n") start, fine = indic[key] all_var = [] for jk in range(start, fine): svd = [] for item in liall: pr, rk = item if rk["name"] == "ensemble" + str(jk): qo.write(str(jk) + "\t" + str(rk["llg"]) + "\t\t#" + str(howmany[jk][2]) + "\n") svd.append(rk["llg"]) all_var.append(rk["llg"]) howmany[jk].append(rk["llg"]) for rendr in howmany[jk][2]: if rendr in res_wllg.keys(): res_wllg[rendr] = [res_wllg[rendr][0] + ( rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0]))), res_wllg[rendr][1] + 1] else: res_wllg[rendr] = [ rk["llg"] / float(Bioinformatics.getNumberOfResidues(howmany[jk][0])), 1] svd = sorted(svd, reverse=True) # print "---",jk,svd qot.write(str(jk) + "\t" + str(svd[0]) + "\t\t#" + str(howmany[jk][2]) + "\n") qn.write(str(key) + "\t" + str(numpy.mean(all_var)) + "\t" + str(numpy.std(all_var)) + "\n") qo.close() qot.close() qe.write("plot \"" + fildname + "_" + str(key) + ".data\" using 1:2 title \"p" + str( key) + "\" with points, \"" + fildname + "_" + str( key) + ".data\" using 1:2 title \"l" + str( key) + "\" smooth csplines, \"" + fildname + "_" + str( key) + "TOP.data\" using 1:2 title \"t" + str(key) + "\" with lines\n") qe.write("unset multiplot\n") qc.write( "plot \"" + fildname + "_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qn.close() qe.close() qc = open(fildname + "_wllg_var.scri", "w") qc.write("set terminal png size 800,800\nset output \"" + fildname + "_wllg_var.png\"\n") qc.write("unset key\n") qc.write("set style line 1 lc rgb 'grey30' ps 0 lt 1 lw 2\n") qc.write("set style line 2 lc rgb 'grey70' lt 1 lw 2\n") qc.write("#set style fill solid 1.0 border rgb 'grey30'\n") qc.write("set border 3\n") qc.write( "plot \"" + fildname + "_wllg_var.data\" u 0:2:($3**2) w yerrorbars ls 1, '' u 0:2:(0.7):xtic(1) w boxes ls 2\n") qc.close() qo = open(fildname + "_wllg.data", "w") qo.write("#\tX\tY\n") chiavi = [] valori = [] for key in sorted(res_wllg.keys()): # qo.write(str(key)+"\t"+str(-1*numpy.log(res_wllg[key][0]/float(res_wllg[key][1])))+"\t\t#"+str(res_wllg[key])+"\n") qo.write(str(key) + "\t" + str(res_wllg[key][0] / float(res_wllg[key][1])) + "\t\t#" + str( res_wllg[key]) + "\n") chiavi.append(key) valori.append(res_wllg[key][0] / float(res_wllg[key][1])) qo.close() # IMPORTANT: Find the peaks def __cmp_peaks(a, b): return cmp(a[1], b[1]) max_peaks = ADT.top_max_peaks(valori, chiavi, 4) minp = (sorted(max_peaks, __cmp_peaks))[0] flat_regions = ADT.flat_regions(valori, chiavi, minp[1]) # FIND THE RESIDUES RANGES TO OMIT FOR EACH PEAK del_resi = [] for peak in max_peaks: all_kurtosis = [] res_center = peak[0] for key in indic.keys(): llgs = [] llg_resi = [] start, fine = indic[key] for blur in range(start, fine): if res_center in howmany[blur][2]: llgs.append(howmany[blur][3]) llg_resi.append((howmany[blur][2], howmany[blur][3])) topllgresi = (sorted(llg_resi, __cmp_peaks, reverse=True))[0] all_kurtosis.append((key, scipy.stats.kurtosis(llgs), topllgresi)) print "ANALYSIS PEAK", peak print "....................................KURTOSIS.................................." for kurto in all_kurtosis: print kurto print ".............................................................................." topkurtosis = (sorted(all_kurtosis, __cmp_peaks, reverse=True))[0] del_resi.append(topkurtosis[2]) res_list_40, safe_llg_40 = ADT.get_percentile_llg(valori, chiavi, 0.40) res_list_50, safe_llg_50 = ADT.get_percentile_llg(valori, chiavi, 0.50) res_list_55, safe_llg_55 = ADT.get_percentile_llg(valori, chiavi, 0.55) res_list_60, safe_llg_60 = ADT.get_percentile_llg(valori, chiavi, 0.60) res_list_65, safe_llg_65 = ADT.get_percentile_llg(valori, chiavi, 0.65) res_list_70, safe_llg_70 = ADT.get_percentile_llg(valori, chiavi, 0.70) res_list_75, safe_llg_75 = ADT.get_percentile_llg(valori, chiavi, 0.75) res_list_80, safe_llg_80 = ADT.get_percentile_llg(valori, chiavi, 0.80) res_list_85, safe_llg_85 = ADT.get_percentile_llg(valori, chiavi, 0.85) print "===========RESIDUE TO OMIT=======" resi_list_1 = [] for lista in del_resi: print "--", lista, "--" resi_list_1 += lista[0] print "=================================" print print "===========FLAT REGIONS==========" resi_list_2 = copy.deepcopy(resi_list_1) for flat in flat_regions: print range(flat[0], flat[1] + 1) resi_list_2 += range(flat[0], flat[1] + 1) print "=================================" trimmed_portion = ADT.erase_bad_region(valori, chiavi, minp[1]) resi_list_3 = trimmed_portion print print "//////////TRIMMED REGIONS////////" print trimmed_portion print "/////////////////////////////////" print if not os.path.exists(fildname + "_wllg.scri"): qe = open(fildname + "_wllg.scri", "w") qe.write("set terminal png size 4200,1200\nset output \"" + fildname + "_wllg.png\"\n") qe.write("set xtics 1\n") for pek in max_peaks: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead" + "\n") for pek in flat_regions: qe.write("set arrow from " + str(int(pek[0])) + ", graph 0 to " + str( int(pek[0])) + ", graph 1 nohead lt 3 lc 3" + "\n") qe.write("set arrow from " + str(int(pek[1])) + ", graph 0 to " + str( int(pek[1])) + ", graph 1 nohead lt 3 lc 3" + "\n") # qe.write("plot \""+fildname+"_wllg.data\" using 1:2 title \"p\" with points, \""+fildname+"_wllg.data\" using 1:2 title \"l\" smooth csplines, "+str(minp[1])+" notitle with lines, "+str(safe_llg)+" notitle with lines\n") qe.write( "plot \"" + fildname + "_wllg.data\" using 1:2 title \"p\" with points, \"" + fildname + "_wllg.data\" using 1:2 title \"l\" smooth csplines, " + str( safe_llg_40) + " title \"p40\" with lines, " + str( safe_llg_50) + " title \"p50\" with lines, " + str( safe_llg_55) + " title \"p55\" with lines, " + str( safe_llg_60) + " title \"p60\" with lines, " + str( safe_llg_65) + " title \"p65\" with lines, " + str( safe_llg_70) + " title \"p70\" with lines, " + str( safe_llg_75) + " title \"p75\" with lines, " + str( safe_llg_80) + " title \"p80\" with lines, " + str( safe_llg_85) + " title \"p85\" with lines\n") qe.close() qo = open(fildname + "_wllg_var.data", "w") qo.write("#\tX\tY\n") # all_var = map(lambda x: -1*numpy.log(x[0]/float(x[1])),res_wllg.values()) all_var = map(lambda x: x[0] / float(x[1]), res_wllg.values()) qo.write(str(LIMIT_CLUSTER) + "\t" + str(numpy.mean(all_var)) + "\t" + str( numpy.std(all_var)) + "\n") qo.close() resi_1 = [] resi_2 = [] resi_3 = [] for resi in resi_list_1: if resi in res_list_75: continue else: resi_1.append(resi) for resi in resi_list_2: if resi in res_list_75: continue else: resi_2.append(resi) for resi in resi_list_3: if resi in res_list_75: continue else: resi_3.append(resi) Bioinformatics.generatePDBomitting(model_file, resi_list_1, os.path.join(outputDire, "../../../library/peaks_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDBomitting(model_file, resi_list_2, os.path.join(outputDire, "../../../library/pklat_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) # Bioinformatics.generatePDBomitting(model_file,resi_list_3,os.path.join(outputDire,"../../../library/overt_"+str(number_cluster)+"_0.pdb"),trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, resi_list_3, os.path.join(outputDire, "../../../library/overt_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_40, os.path.join(outputDire, "../../../library/percentile40_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_50, os.path.join(outputDire, "../../../library/percentile50_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_55, os.path.join(outputDire, "../../../library/percentile55_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_60, os.path.join(outputDire, "../../../library/percentile60_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_65, os.path.join(outputDire, "../../../library/percentile65_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_70, os.path.join(outputDire, "../../../library/percentile70_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_75, os.path.join(outputDire, "../../../library/percentile75_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_80, os.path.join(outputDire, "../../../library/percentile80_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) Bioinformatics.generatePDB(model_file, res_list_85, os.path.join(outputDire, "../../../library/percentile85_" + str( number_cluster) + "_0.pdb"), trim_to_polyala=trim_to_polyala) dipl = {} C = [] hp = ADT.Heap() brncv = {} models_final = [os.path.join(outputDire, "../../../library/peaks_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/pklat_" + str(number_cluster) + "_0.pdb"), os.path.join(outputDire, "../../../library/overt_" + str(number_cluster) + "_0.pdb")] for y in range(3): rol = copy.deepcopy(rota) rol["name"] = "ensemble" + str(y) brncv[rol["name"]] = models_final[y] hp.push((-1 * rol["llg"], -1 * rol["zscore"]), rol) dipl["heapSolutions"] = hp C.append(dipl) writeSumClusters(C, outputDire, "final", brncv) convNames, CluAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, os.path.join(outputDire, "final.sum"), "ROTSOL") for du in Dust: for item in du["heapSolutions"].asList(): pi, ri = item pdbf = cnv[ri["name"]] trim_small_chains(pdbf, 3) return convNames def startMR_ELLG(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, ensembles={}, fromN=0, toN=None, ellg_target=30, formfactors='FORMFACTORS XRAY'): """ Given a list of pdbs and their rmsd, computes both the eLLG with the model and the number of residues required to reach a target eLLG :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param dir_o_liFile: path with input files :type dir_o_liFile: str :param outputDire: path to save output files :type outputDire: str :param mtz: path to the mtz file with the original data :type mtz: str :param MW: molecular weight :type MW: float :param NC: number of component :type NC: integer :param F: label for the intentities (I) or amplitudes (F) :type F: str :param SIGF: label for the sigmas of the intentities (SIGI) or the amplitudes (SIGF) :type SIGF: str :param Intensities: boolean to be removed, currently not used :param Aniso: boolean to be removed, currently not used :param normfactors: path to the binary file with the anisotropy correction for the data :type normfactors: str :param tncsfactors: path to the binary file with the tNCS correction for the data :type tncsfactors: str :param nice: in multiprocessing, priority of the jobs. −20 is the highest priority and 19 is the lowest priority :type nice: int :param RMSD: rmsd value to be used in the phaser job :type RMSD: float :param lowR: low resolution limit to be used in phaser :type lowR: float :param highR: high resolution limit to be used in phaser :type highR: float :param frag_fixed: number of fixed fragments :type frag_fixed: int :param ensembles: ensemble to pdb filepath dictionary :type ensembles: dict :param fromN: starting file to process :type fromN: int :param toN: final file to process :type toN: int :param ellg_target: expected LLG target to be used in the phaser run :type ellg_target: float :return (nq, ensembles): queue number, ensembles :rtype (nq, ensembles): tuple """ dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): # Remote grid current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall # First fragment case if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): print "Preparing: " + str(pdbf) + " as model " + str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "ellg.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print 'There was an error in the linking of the files' print sys.exc_info() traceback.print_exc(file=sys.stdout) pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "ellg.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ELLG" + "\n") f.write('HKLIN "ellg.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE eLLG computation" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("ELLG TARG " + str(ellg_target) + "\n") f.write("ELLG POLYALANINE RMSD " + str(RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " DISABLE CHECK ON" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # N fragment case print "Sorry, multiple fragments case is not yet implemented" sys.exit(0) def startMR_ELLGJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "MR_ELLG of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startMR_ELLGJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("ellg.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startFRF(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ensembles={}, tops=None, LIMIT_CLUSTER=None, justpdb=None, fromN=0, toN=None, VRMS=False, BFAC=True, BULK_FSOL=-1, BULK_BSOL=-1, formfactors='FORMFACTORS XRAY'): """ Performs phaser's fast rotation function :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param dir_o_liFile: :type dir_o_liFile: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param final_rot: :type final_rot: :param save_rot: :type save_rot: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param sampl: :type sampl: :param ensembles: :type ensembles: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param justpdb: :type justpdb: :param fromN: :type fromN: :param toN: :type toN: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :return: :rtype: """ # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile hwm = -1 for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) # print "Found the pdb named",pdbf if fileu.endswith(".pdb") and "inverted" not in fileu: if justpdb != None and pdbf != os.path.abspath(justpdb): continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) hwm += 1 if fromN >= 0 and toN != None and hwm < fromN: continue elif fromN >= 0 and toN != None and hwm >= toN: break if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") ensembles["ensemble" + str(counter + 1)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FRF" + "\n") f.write('HKLIN "rot.mtz"' + "\n") # NOTE CM: testing micro ED data f.write(formfactors+ "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") f.write("TITLE Test rotation for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: # Then we read instead of recomputing f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " DISABLE CHECK ON"+"\n") f.write("SEARCH ENSEMBLE ensemble" + str(counter + 1) + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("PEAKS ROT DOWN 0" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) elif frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile, list): # even if frag_fixed is 2 we still do not have computed anything for the second fragment search so we have to recollect data # as they would be at the first fragment, then when we evaluateFRF we will change the content of the dataase and the FTF could # be computed taking the data as 2 fragment. ensels = [] tovalu = frag_fixed if frag_fixed == 2: tovalu = 1 cou = 0 ClusAll = None if LIMIT_CLUSTER != None: ClusAll = dir_o_liFile[LIMIT_CLUSTER] else: ClusAll = {} nh = ADT.Heap() for clun in dir_o_liFile: for item in clun["heapSolutions"]: prio, rota = item nh.push(prio, rota) ClusAll["heapSolutions"] = nh for sol in ClusAll["heapSolutions"]: if tops != None and cou >= tops: break rota = sol[1] prio = sol[0] pdbf = "" list_pdbs = {} ct = 0 enselines = "" if frag_fixed > 1 and "ensemble" + str(frag_fixed) in ensembles: pdbf = ensembles["ensemble" + str(frag_fixed)] if pdbf not in list_pdbs.keys(): list_pdbs[pdbf] = ("ensemble" + str(frag_fixed), ct, True, RMSD) #New search ensemble so it must be used the current RMSD ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, False, rota["rmsd"] if "rmsd" in rota else RMSD) ct += 1 elif list_pdbs[ensembles[rota["name"]]][0] != rota["name"]: dfr = list_pdbs[ensembles[rota["name"]]][1] snc = "ENSEMBLE " + rota["name"] + " PDBFILE " + str(dfr) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n" snc += "ENSEMBLE " + rota["name"] + " DISABLE CHECK ON" + "\n" if snc not in ensels: enselines += snc ensels.append(snc) else: pdbf = ensembles[rota["name"]] # ensembles["ensemble"+str(frag_fixed)] = pdbf if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True, RMSD) #New search ensemble so it must be used the current RMSD ct += 1 # rota["name"] = "ensemble"+str(frag_fixed) if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False, rotafi["rmsd"] if "rmsd" in rotafi else RMSD) ct += 1 elif list_pdbs[ensembles[rotafi["name"]]][0] != rotafi["name"]: dfr = list_pdbs[ensembles[rotafi["name"]]][1] snc = "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str(dfr) + "_" + str( counter) + ".pdb RMS " + str(rotafi["rmsd"] if "rmsd" in rotafi else RMSD) + "\n" snc += "ENSEMBLE " + rotafi["name"] + " DISABLE CHECK ON" + "\n" if snc not in ensels: enselines += snc ensels.append(snc) numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rot.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rot.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") headlines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FRF" + "\n" headlines += 'HKLIN "rot.mtz"' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' # NOTE CM: testing micro ED data headlines += formfactors + "\n" headlines += "RFACTOR USE OFF CUTOFF 35" + "\n" # NOTE CM: testing micro ED data headlines += "TITLE Test rotation for Grid" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" if not Aniso: headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] snc = "ENSEMBLE " + val[0] + " PDBFILE " + str(val[1]) + "_" + str(counter) + ".pdb RMS " + str(val[3]) + "\n" snc += "ENSEMBLE " + val[0] + " DISABLE CHECK ON" + "\n" if snc not in ensels: enselines += snc ensels.append(snc) if val[2]: enselines += "SEARCH ENSEMBLE " + val[0] + "\n" if frag_fixed > 1: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" if frag_fixed > 2: for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) taillines += "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n" taillines += "SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(rota["llg"]) + " ZSCORE: " + str( rota["zscore"]) + "\n" taillines += "SEARCH METHOD FAST" + "\n" taillines += "PEAKS ROT DOWN 0" + "\n" taillines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: taillines += "SOLPARAMETERS BULK USE ON" + "\n" taillines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: taillines += "SOLPARAMETERS BULK USE OFF" + "\n" if sampl != -1: taillines += "SAMPLING ROT " + str(sampl) + "\n" taillines += "PEAKS ROT SELECT PERCENT" + "\n" taillines += "PEAKS ROT CUTOFF " + str(save_rot) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),"./"+str(counter)+".sh") counter += 1 # print "checking",counter,counter%NUMBER_OF_FILES_PER_DIRECTORY,ClusAll["heapSolutions"].len(),counter >= 1 and ClusAll["heapSolutions"].len() == 0 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (counter >= 1 and ClusAll["heapSolutions"].len() == 0): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) cou += 1 if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE ROTATION SEARCH. " \ "MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFAULT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFRFJob(outputDirectory, op): """ :param outputDirectory: path to folder for the output :type outputDirectory: str :param op: :type op: :return: :rtype: """ if os.path.exists(os.path.join(outputDirectory, str(op) + ".rlist")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") print " *** RF Job %s / %s***\n" % ( op + 1, counter) # NS ADD inform the user about how many jobs are remaining p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): # if frag_fixed > 1 or len(ensembles.keys()) > 0 and isinstance(dir_o_liFile,list): # listaDirec.append((cm.get_remote_pwd(),numero)) # else: listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all rotations with a peak >= " + str(final_rot) print "Saving all rotations with a peak >= " + str(save_rot) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFRFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rot.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return (nq, ensembles) def startPlaneElongation(inputDirectory, outputDirectory, n_res, n_trials, n_copies): """ :param inputDirectory: :type inputDirectory: :param outputDirectory: path to folder for the output :type outputDirectory: str :param n_res: :type n_res: :param n_trials: :type n_trials: :param n_copies: :type n_copies: :return: :rtype: """ import Data if n_copies > 3: print "Cannot use more than 3 copies to expand the partial solution. Quitting..." sys.exit(1) if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): rmsd = None allAtoms = [] atoms_add = [] pdball = None f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) list_res = [] for model in struc: for chain in model: list_res += chain.get_unpacked_list() for resi in list_res: for atm in resi: atoms_add.append(atm) charch = ["W", "Y", "Z"] sx = 0 for key in Data.frequent_folds.keys(): pdball = "" sx += 1 struttura = Data.frequent_folds[key] struc_c = Bioinformatics.getStructure(key, cStringIO.StringIO(struttura)) list_res_c = [] atoms_add_c = [] for model_c in struc_c: for chain_c in model_c: list_res_c += chain_c.get_unpacked_list() for resi_c in list_res_c: for atm_c in resi_c: atoms_add_c.append(atm_c) for nd in range(n_copies): indo = numpy.random.randint(len(list_res) - n_res) reference = [] for z in range(indo, indo + n_res): atomCA = list_res[z]["CA"] atomC = list_res[z]["C"] atomO = list_res[z]["O"] atomN = list_res[z]["N"] reference.append(atomCA.get_coord()) reference.append(atomC.get_coord()) reference.append(atomO.get_coord()) reference.append(atomN.get_coord()) best_rt = (None, None) best_rmsd = 100 rmsd = 100 times = 0 while times < 5000: times += 1 compare = [] indi = numpy.random.randint(len(list_res_c) - n_res) for z in range(indi, indi + n_res): atomCA = list_res_c[z]["CA"] atomC = list_res_c[z]["C"] atomO = list_res_c[z]["O"] atomN = list_res_c[z]["N"] compare.append(atomCA.get_coord()) compare.append(atomC.get_coord()) compare.append(atomO.get_coord()) compare.append(atomN.get_coord()) transf, rmsd_list, rmsd = Bioinformatics.fit_wellordered(numpy.array(reference), numpy.array(compare), n_iter=1, full_output=True) R, t = transf if rmsd < best_rmsd: best_rmsd = rmsd best_rt = (R, t) # print "Selected best rmsd for",key,"is",best_rmsd,times allAtoms = Bioinformatics.transform_atoms(atoms_add_c, best_rt[0], best_rt[1]) pdball += Bioinformatics.getPDBFromListOfAtom(allAtoms, renumber=True, uniqueChain=True, chainId=charch[nd])[0] + "\n" filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + "-" + str(sx) + ".pdb") print "File", os.path.basename(filename), "rmsd:", best_rmsd f = open(filename, "w") f.write(allpdb + "\n") f.write(pdball + "\n") f.close() def startRandomlyExpand(inputDirectory, outputDirectory, ray, n_ca_pick, n_trials): """ :param inputDirectory: :type inputDirectory: :param outputDirectory: path to folder for the output :type outputDirectory: str :param ray: :type ray: :param n_ca_pick: :type n_ca_pick: :param n_trials: :type n_trials: :return: :rtype: """ ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) for root, subFolders, files in os.walk(inputDirectory): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): for q in range(n_trials): atoms_add = [] f = open(pdbf, "r") allpdb = f.read() f.close() struc = Bioinformatics.getStructure(os.path.basename(pdbf), pdbf) for model in struc: for chain in model: list_res = chain.get_unpacked_list() indices = numpy.random.randint(len(list_res) - 1, size=n_ca_pick) print "Picking random indices", indices for index in indices: atom_ca = list_res[index]["CA"] nx, ny, nz = atom_ca.get_coord() cz = numpy.random.randint(-1 * ray, high=ray) phi = numpy.random.uniform(low=0.0, high=2.0) nx += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.cos(phi) ny += numpy.sqrt(ray ** 2 - cz ** 2) * numpy.sin(phi) nz += cz atoms_add.append([nx, ny, nz, atom_ca.get_parent().get_segid(), atom_ca.get_name(), atom_ca.get_fullname(), atom_ca.get_altloc()]) filename = os.path.join(outputDirectory, os.path.basename(pdbf)[:-4] + "_" + str(q) + ".pdb") f = open(filename, "w") f.write(allpdb + "\n") atom_number = 1 resseq = 1 line = "" for atom in atoms_add: args = ( "ATOM ", atom_number, atom[5], atom[6], 'ALA', 'Z', resseq, ' ', atom[0], atom[1], atom[2], 1.0, 25.0, atom[3], 'C', ' ') line = ATOM_FORMAT_STRING % args resseq += 1 atom_number += 1 f.write(line) f.close() def startPREPARE(cm, sym, nameJob, CC_Val, outputDirectory, cell_dim, spaceGroup, nTop, topNext=None, always_include_names_with_key=None): """ Prepares the set of pdbs that will be sent for expansion with SHELXE # NOTE CM: This three variables are not used at all in the function, we should remove them :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job :type nameJob: str # NOTE CM: This three variables are not used at all in the function, we should remove them :param CC_Val: list of dictionaries. Each one is a solution, and contains the following keys: {"model": model, "corresp": corresp, "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": cluster, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": contrast, "connect": connect, "mfom": mfom, "sfom": sfom} :type CC_Val: list :param outputDirectory: path to folder for the output :type outputDirectory: str :param cell_dim: :type cell_dim: :param spaceGroup: :type spaceGroup: :param nTop: :type nTop: :param topNext: :type topNext: :return: convNames dictionary :rtype: dict """ if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) counter = 0 convNames = {} if topNext != None: counter = 0 listaWork_key = map(lambda x: "ensembleIDxx" + os.path.basename(x["corresp"])[:-4].split("xx")[-1], CC_Val) listaWork_value = map(lambda x: x["initcc"], CC_Val) listaWork = dict(zip(listaWork_key, listaWork_value)) # print listaWork_key # print listaWork_value counter = 0 for clu in CC_Val: pdbf = clu["corresp"] if always_include_names_with_key is None and nTop != None and counter > nTop: break elif nTop != None and counter > nTop and always_include_names_with_key not in pdbf: continue # print clu print "Preparing: " + str(pdbf) + " as model " + str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if "suffix" in clu.keys(): nomino = os.path.basename(pdbf)[:-4] + clu["suffix"] + ".pdb" else: nomino = os.path.basename(pdbf) foc = open(outputDirectory + "/" + nomino, "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") data2 = foc2.readlines() datas = "" for lin in data2: ler = lin.split() if ler[0] != "CRYST1": datas += lin foc2.close() foc.write(datas) foc.close() convNames["ensemble" + str(counter)] = outputDirectory + "/" + nomino counter += 1 return convNames def getTheTOPNOfEachCluster(DicParameters, frag_fixed, dirout, mode, quate, ClusAll, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, modeTra="frac", LIMIT_CLUSTER=None, renameWithConvNames=False, sufixSolPos=False, appendToName="", where_to_write={}): """ Given a CluAll and a convNames object, returns a number of solutions from it, also writing the PDBs and applying rotations and or translations to them. :param DicParameters: :type DicParameters: dict :param frag_fixed: :type frag_fixed: int :param dirout: :type dirout: :param mode: :type mode: :param quate: :type quate: :param ClusAll: :type ClusAll: :param convNames: :type convNames: :param ntop: :type ntop: :param writePDB: :type writePDB: :param performTranslation: :type performTranslation: :param elongatingModel: :type elongatingModel: :param createSimmetry: :type createSimmetry: :param cell_dim: :type cell_dim: :param laue: :type laue: :param ncs: :type ncs: :param modeTra: :type modeTra: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param renameWithConvNames: :type renameWithConvNames: :param sufixSolPos: :type sufixSolPos: :param appendToName: :type appendToName: :return: :rtype: """ if dirout != "" and not os.path.exists(dirout): os.makedirs(dirout) bests = [] foms = {"llg": [numpy.inf, 0.0], "zscore": [numpy.inf, 0.0]} for ci in range(len(ClusAll)): clu = ClusAll[ci] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != ci: continue s = 0 cou = 0 hp = ADT.Heap() liBest = [] i = 0 for sol in clu["heapSolutions"]: if ntop != None and i >= ntop: break bestRota = sol[1] prio = (bestRota["llg"], bestRota["zscore"]) i += 1 # print bestRota["name"],convNames[bestRota["name"]] liBest.append(bestRota) llg = bestRota["llg"] zscore = bestRota["zscore"] cluster_fin = bestRota["original_rotcluster"] if llg < (foms["llg"])[0]: (foms["llg"])[0] = llg if llg > (foms["llg"])[1]: (foms["llg"])[1] = llg if zscore < (foms["zscore"])[0]: (foms["zscore"])[0] = zscore if zscore > (foms["zscore"])[1]: (foms["zscore"])[1] = zscore if writePDB: pdbSt = [[]] if "fixed_frags" in bestRota: for frifr in bestRota["fixed_frags"]: allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, frifr, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos, where_to_write=where_to_write) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) pdbSt.append([]) allpdbli = __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, bestRota, ci, i, modeTra=modeTra, LIMIT_CLUSTER=LIMIT_CLUSTER, renameWithConvNames=renameWithConvNames, sufixSolPos=sufixSolPos, where_to_write=where_to_write) for pdbname in allpdbli: f = open(pdbname, "r") allpdb = f.read() f.close() os.remove(pdbname) pdbSt[-1].append(allpdb) wow = numpy.array(pdbSt) for fileC in wow.transpose(): path_c, new_name_cond = os.path.split(os.path.normpath(allpdbli.pop(0))) new_name_cond = new_name_cond.split("_")[0] + appendToName + "_" + new_name_cond.split("_")[ 1] + "_" + new_name_cond.split("_")[2] f = open(os.path.join(path_c, new_name_cond), "w") druppo = str(cluster_fin) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(Bioinformatics.sequentialRenumberListOfPdbs(fileC)) f.close() bests.append(liBest) # for bestRota in liBest: # priority = (-1*bestRota["llg"], -1*bestRota["zscore"]) # clu["heapSolutions"].push(priority,bestRota) return bests, foms def filterClustersAndSolutionByCores(CluAll, sym): """ Reduces the number of rotation clusters to pursue as a function of the hardware available Args: CluAll (list with ADT.Heaps, one per rotation cluster): sym (SystemUtility object): Returns: Clu (list): same type as input, but now filtered accordingly """ numeroClus = 2 * sym.PROCESSES numeroRot = 100 * sym.PROCESSES # 64 CluAll = sorted(CluAll, __cmp_cluster, reverse=True) print "Original clusters", len(CluAll) n = 0 Clu = [] for clu in CluAll: if n >= numeroClus: # print "Break clusters because", n, numeroClus break s = 0 clun = {"heapSolutions": ADT.Heap()} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] # print "CLUSTER", rota["n_prev_cluster"],rota["llg"],rota["zscore"] if s >= numeroRot: # print "Break rotations because", s, numeroRot break clun["heapSolutions"].push(prio, rota) s += 1 Clu.append(clun) n += 1 print "Final Clusters", len(Clu) return Clu def filterAndCountClusters(ClusAll, ensembles, mode, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg, unify=False): """ :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param mode: :type mode: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param cell_dim: :type cell_dim: :param clusteringAlg: :type clusteringAlg: :param threshold_alg: :type threshold_alg: :param unify: :type unify: :return: :rtype: """ condition = True while condition: nea = '' neb = '' numc = 0 for inde in range(len(ClusAll)): clu = ClusAll[inde] nrpdbs = {} for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): rota = item[1] prio = item[0] numc = rota["n_prev_cluster"] pdbname = ensembles[rota['name']] if ensembles[rota['name']] not in nrpdbs: nrpdbs[ensembles[rota['name']]] = rota else: r = nrpdbs[ensembles[rota['name']]] if mode == 'llg' and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'llg' and rota['llg'] == r['llg'] and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] > r['zscore']: nrpdbs[ensembles[rota['name']]] = rota elif mode == 'zscore' and rota['zscore'] == r['zscore'] and rota['llg'] > r['llg']: nrpdbs[ensembles[rota['name']]] = rota newDic = {"heapSolutions": ADT.Heap()} for key in nrpdbs.keys(): rota = nrpdbs[key] newDic["heapSolutions"].push((-1 * rota["llg"], -1 * ["zscore"]), rota) ClusAll[inde] = newDic if unify: condition, ClusAll = unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg) else: condition = False return ClusAll def applyFilterName(CluAll, ensembles): """ :param CluAll: :type CluAll: :param ensembles: :type ensembles: :return: :rtype: """ convNames = {} for clu in CluAll: rotazioni = map(lambda x: x[1], clu["heapSolutions"].asList()) pdb_done = [] hp = ADT.Heap() for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): if (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) priority = (-1 * rotaz['llg'], -1 * rotaz['zscore']) hp.push(priority, rotaz) convNames[rotaz["name"]] = ensembles[rotaz["name"]] clu["heapSolutions"] = hp return CluAll, convNames # NOTE: this method is supported just for ARCIMBOLDO-BORGES.py and NOT!!!! for ARCIMBOLDO def unifyClustersEquivalent(ClusAll, ensembles, quate, laue, listNCS, cell_dim, clusteringAlg, threshold_alg): """ :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param cell_dim: :type cell_dim: :param clusteringAlg: :type clusteringAlg: :param threshold_alg: :type threshold_alg: :return: :rtype: """ performed = False visited = [] numc = 0 for a in range(len(ClusAll)): clu1 = ClusAll[a] if a in visited: continue visited.append(a) if len(clu1["heapSolutions"].asList()) == 0: continue item1 = clu1["heapSolutions"].pop() clu1q = item1[1] clu1["heapSolutions"].push(item1[0], item1[1]) numc = clu1q["n_prev_cluster"] for b in range(a + 1, len(ClusAll)): if b in visited: continue clu2 = ClusAll[b] if len(clu2["heapSolutions"].asList()) == 0: continue item2 = clu2["heapSolutions"].pop() clu2q = item2[1] clu2["heapSolutions"].push(item2[0], item2[1]) # threshold_alg = -1 # if clusteringAlg == 'quaternion': # threshold_alg = 0.06 # elif clusteringAlg == 'distributionCV': # threshold_alg = 10 # else: # return re, elo = compareRotation(clu1q, clu2q, threshold_alg, clusteringAlg, quate, laue, listNCS, ensembles, cell_dim, True) if re: performed = True visited.append(b) for ite in clu2["heapSolutions"]: prio2 = ite[0] rota2 = ite[1] rota2['n_prev_cluster'] = numc # NOTE: the following instruction imply we are always working with 1 fixed frag. This method thus # it is not supported for ARCIMBOLDO but just for ARCIMBOLDO-BORGES rota2['original_rotcluster'] = numc priority = (-1 * rota2['llg'], -1 * rota2['zscore']) clu1["heapSolutions"].push(priority, rota2) return performed, ClusAll def filterOutImprobableSols(ClusAll, minLLG): """ :param ClusAll: :type ClusAll: :param minLLG: :type minLLG: :return: :rtype: """ llg_all = [] zscore_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) llg_all = sorted(llg_all, reverse=True) zscore_all = sorted(zscore_all, reverse=True) zscore_thresh = 0 llg_thresh = None if zscore_all[0] >= 7.50: zscore_thresh = 7.50 print "Found promising solutions with Zscore at:", zscore_all[0] print "Pruning solutions with lower zscore." ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["zscore"] >= zscore_thresh and rota["llg"] >= minLLG: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def filterAllSolsByTop(ClusAll, frag_fixed, percentage): """ :param ClusAll: :type ClusAll: :param frag_fixed: :type frag_fixed: :param percentage: :type percentage: :return: :rtype: """ llg_all = [] for clu in ClusAll: for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) llg_avg = float(numpy.mean(numpy.array(llg_all))) # limiteLLG = (llg_avg*(100-percentage))/50.0 # if llg_avg < 0: # limiteLLG += llg_avg # if llg_top < 0: # limiteLLG += llg_top print "Pruning solutions out of the mean. AVG LLG:", llg_avg ClurAll = [] nall = 0 for clu in ClusAll: dic = {"heapSolutions": ADT.Heap()} for item in clu["heapSolutions"]: prio, rota = item if rota["llg"] >= llg_avg: dic["heapSolutions"].push(prio, rota) nall += 1 ClurAll.append(dic) return ClurAll def startExpansion(cm, sym, nameJob, outputDire, hkl, ent, nice, cell_dim, spaceGroup, shlxLine, dirBase, limit_CC_data=None, treshold_CC=None, fragdomain=False, single=False, insfile=None): """ :param cm: :param sym: :param nameJob: :param outputDire: :param hkl: :param ent: :param nice: :param cell_dim: :param spaceGroup: :param shlxLine: :param dirBase: :param limit_CC_data: :param treshold_CC: :param fragdomain: :param single: :param insfile: :return: """ if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" convNames = {} rootdir = dirBase searchfor = ".pdb" startfile = ".pda" if insfile != None: limit_CC_data = None treshold_CC = None searchfor = ".phi" startfile = ".phi" for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(searchfor): if limit_CC_data != None and treshold_CC != None: toContinue = False for m in limit_CC_data: if m["corresp"] == pdbf: if float(m["initcc"]) < treshold_CC: toContinue = True else: toContinue = False break if toContinue: continue # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 if insfile == None: # shutil.copyfile(pdbf, outputDirectory+"/"+str(counter)+".pda") foc = open(outputDirectory + "/" + str(counter) + ".pda", "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) foc.write(CRD.format(**data) + "\n") foc2 = open(pdbf, "r") alls = foc2.readlines() foc2.close() countt = 0 scrivi = True previous = -1 lineaDascri = None if fragdomain: for linea in alls: if linea.startswith("REMARK"): foc.write(linea) elif linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: foc.write("REMARK DOMAIN " + str(countt + 1) + "\n") if lineaDascri != None: foc.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): foc.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: foc.write(linea) previous = residuo else: for linea in alls: foc.write(linea) foc.close() else: try: # os.symlink(pdbf,os.path.join(outputDirectory,str(counter)+".phi")) shutil.copyfile(pdbf, os.path.join(outputDirectory, str(counter) + ".phi")) except: print "", os.path.join(outputDirectory, str(counter) + ".phi"), "already exists!" try: os.symlink(hkl, os.path.join(outputDirectory, str(counter) + ".hkl")) except: print "", os.path.join(outputDirectory, str(counter) + ".hkl"), "already exists!" if insfile != None: if hasattr(cm, "channel"): shutil.copyfile(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) else: try: os.symlink(insfile, os.path.join(outputDirectory, str(counter) + ".ins")) except: print "", os.path.join(outputDirectory, str(counter) + ".ins"), "already exists!" try: if os.path.exists(ent): os.symlink(ent, os.path.join(outputDirectory, str(counter) + ".ent")) except: print "", os.path.join(outputDirectory, str(counter) + ".ent"), "already exists!" if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) if insfile == None: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".pda", "./" + str(ndir - 1) + "/" + str(counter) + ".pda") else: cm.copy_local_file(outputDirectory + "/" + str(counter) + ".phi", "./" + str(ndir - 1) + "/" + str(counter) + ".phi") cm.copy_local_file(outputDirectory + "/" + str(counter) + ".ins", "./" + str(ndir - 1) + "/" + str(counter) + ".ins") cm.create_remote_link(cm.remote_hkl_path, str(counter) + ".hkl") if os.path.exists(ent): cm.create_remote_link(cm.remote_ent_path, str(counter) + ".ent") convNames[str(counter) + startfile] = pdbf # conv2[rota["name"]] = str(counter)+".pda" counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startExpJob(outputDirectory, op, lineargs): """ :param outputDirectory: path to folder for the output :type outputDirectory: str :param op: :type op: :param lineargs: :type lineargs: :return: :rtype: """ if os.path.exists(os.path.join(outputDirectory, str(op) + ".phs")): return print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_SHELXE, os.path.join(outputDirectory, str(op) + startfile), " ".join( lineargs) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_SHELXE, str(op) + startfile] + lineargs, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter lia = shlxLine.split() print "Expansion of " + str(nq) + " models with:" print "hkl: " + str(hkl) print "Arguments: " + str(shlxLine) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startExpJob, outputDirectory, op, lia) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_SHELXE + " " + os.path.join(outputDirectory, str(op) + startfile) + " " + " ".join( lia) + "/dev/null" SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, single=single) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) if hasattr(cm, "channel"): job.setExecutable(os.path.join(cm.get_remote_pwd(), PATH_NEW_SHELXE)) else: job.setExecutable(PATH_NEW_SHELXE) job.setInitialDir(listaDirec) job.addInputFile(".hkl", True) job.addInputFile(startfile, True) if os.path.exists(ent): job.addInputFile(".ent", True) if insfile != None and os.path.exists(insfile): job.addInputFile(".ins", True) # job.addOutputFile(".out",True) # job.setMaxRuntime(172800) # job.setPeriodicRemove("JobStatus == 2 &&((CurrentTime - EnteredCurrentStatus) + RemoteWallClockTime - CumulativeSuspensionTime > $(maxRunTime))") lia = shlxLine.split() lio = [startfile] lei = lio + lia job.setArguments(lei) cm.setRequirements(SHELXE_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, convNames def startPACK(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, cutoff, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, formfactors='FORMFACTORS XRAY'): """ Performs phaser's translation function for ARCIMBOLDO_BORGES runs. :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param ClusAll: list with ADT.Heaps, one per rotation cluster :type ClusAll: list :param ensembles: dictionary in which keys are the names of the pdb files solutions, and value the ensemble names :type ensembles: dict :param outputDire: folder where to compute and output :type outputDire: str :param mtz: path to the mtz :type mtz: str :param MW: molecular weight :type MW: float :param NC: number of component :type NC: int :param F: label for the intensities or the amplitudes :type F: str :param SIGF: label for the sigmas of the intensities or the amplitudes :type SIGF: str :param Intensities: boolean that indicates whether data are intensities or not :type Intensities: bool :param Aniso: boolean to indicate whether anisotropy and tncs corrections should be computed or read :type Aniso: str :param normfactors: path to the binary file with the anisotropy corrections :type normfactors: str :param tncsfactors: path to the binary file with the tNCS corrections :type tncsfactors: str :param nice: indicates priority for linux processes, ranging from -20 (highest priority) to 19 (lowest) :type nice: int :param RMSD: estimated rmsd deviation to target for phaser job :type RMSD: float :param lowR: low resolution limit for phaser job :type lowR: float :param highR: high resolution limit for phaser job :type highR: float :param cutoff: Limit on percent of clashes :type cutoff: int :param spaceGroup: space group symbol :type spaceGroup: str :param frag_fixed: number of fixed fragments :type frag_fixed: int :param tops: limit to the number of solutions to test :type tops: int :param LIMIT_CLUSTER: rotation cluster id to process :type LIMIT_CLUSTER: int :param VRMS: boolean to indicate whether to perform variance rms refinement :type VRMS: bool :param BFAC: boolean to indicate whether to perform bfactor refinement :type BFAC: bool :param formfactors: either 'FORMFACTORS XRAY' or 'FORMFACTORS ELECTRON' :type formfactors: str :return: :rtype: """ if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and counter >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc() pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("PACK QUICK ON"+"\n") # Packing check stops when MAX_CLASHES is reached f.write("PACK CUTOFF " + str(cutoff) + "\n") # NOTE: If PACK COMPACT ON, Pack ensembles into a compact association # (minimize distances between centres of mass for the addition of each component in a solution) f.write("PACK COMPACT OFF" + "\n") # OFF at the moment, we should change some steps after f.write("XYZOUT OFF" + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE " + rota["name"] + " DISABLE CHECK ON"+"\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(frifr["rmsd"] if "rmsd" in frifr else RMSD) + "\n") f.write("ENSEMBLE " + frifr["name"] + " DISABLE CHECK ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startPACKOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, cutoff, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, VRMS=False, BFAC=False, randomize_trans_per_rot=0, consider_inverted_helix=False, formfactors='FORMFACTORS XRAY'): """ Performs phaser's packing function for ARCIMBOLDO_LITE runs. :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param packSelect: :type packSelect: :param cutoff: :type cutoff: :param distance: :type distance: :param spaceGroup: :type spaceGroup: :param frag_fixed: :type frag_fixed: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param usePDO: :type usePDO: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param randomize_trans_per_rot: :type randomize_trans_per_rot: :param consider_inverted_helix: :type consider_inverted_helix: :return: :rtype: """ if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall for cds in range(len(ClusAll)): clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] rota = sol[1] prio = sol[0] cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} ct = 0 if usePDO or consider_inverted_helix: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2 or consider_inverted_helix: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False, rota["rmsd"] if "rmsd" in rota else RMSD) ct += 1 # print "===================================TEST RAPIDO=================" # print rota["name"],ensembles[rota["name"]] # print rota["fixed_frags"][0]["name"],ensembles[rota["fixed_frags"][0]["name"]] # print "===================================TEST RAPIDO=================" if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True, RMSD) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False, rotafi["rmsd"] if "rmsd" in rotafi else RMSD) ct += 1 numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "pack.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "Advisory: There was some problem generating the links to the mtz, tncs and norm files" print traceback.print_exc(file=sys.stdout) pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "pack.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # print "B remote cwd",cm.get_remote_pwd() if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_PAK" + "\n") f.write('HKLIN "pack.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM testing for microED f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35"+"\n") # NOTE CM testing for microED f.write("TITLE Test packing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT OFF" + "\n") f.write("PACK CUTOFF " + str(cutoff) + "\n") f.write("PACK COMPACT OFF" + "\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO and not consider_inverted_helix: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " DISABLE CHECK ON" + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(frifr["rmsd"] if "rmsd" in frifr else RMSD) + "\n") f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " DISABLE CHECK ON" + "\n") else: for valok in list_pdbs: valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(valo[3]) + "\n") f.write("ENSEMBLE ensarci" + str(valo[1]) + " DISABLE CHECK ON" + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] ####lip.append(int(rota["original_rotcluster"].split("_")[-1])) lip.append(int(rota["name"].split("FR")[-1].split("_")[0])) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: #NOTE: Important change #####lip.append(int(r["original_rotcluster"].split("_")[-1])) lip.append(int(r["name"].split("FR")[-1].split("_")[0])) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) fixd = [] for ep in range(len(lip) - 1): # the last one would be excluded because is not the fixed but actual rotation ced = str(lip[ep]) initial = None while True: rep = ros.pop(0) if rep["name"].split("FR")[-1].split("_")[0] == ced: if len(fixd) > 0: rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) break else: ros.append(rep) rota = ros.pop() if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] if randomize_trans_per_rot <= 0: if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 else: for rnds in numpy.random.uniform(low=0.000, high=0.999, size=(randomize_trans_per_rot, 3)): if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) # for keee in ensembles: # print keee,";;;;;;",ensembles[keee] # print rotafi["name"] # print "=================================",ensembles[rotafi["name"]] # print list_pdbs[ensembles[rotafi["name"]]] f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str(rnds[0]) + " " + str( rnds[1]) + " " + str(rnds[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " " + rota[ "name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startPACKJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "PACK of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startPACKJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("pack.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startOCC(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, ellg=None, nres=None, rangeocc=None, merge=None, occfrac=None, occoffset=None, ncycles=None, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1,formfactors='FORMFACTORS XRAY'): """ Performs phaser's occupancy refinement for ARCIMBOLDO_BORGES runs. :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param dir_o_liFile: :param outputDire: :param mtz: :param MW: :param NC: :param F: :param SIGF: :param Intensities: :param Aniso: :param normfactors: :param tncsfactors: :param nice: :param RMSD: :param lowR: :param highR: :param final_rot: :param save_rot: :param frag_fixed: :param spaceGroup: :param sampl: :param ellg: :param nres: :param rangeocc: :param merge: :param occfrac: :param occoffset: :param ncycles: :param tops: :param LIMIT_CLUSTER: :param VRMS: :param BFAC: :param BULK_FSOL: :param BULK_BSOL: :return: """ # """ Performs Phaser's occupancy refinement. # # Args: # DicParameters (dict): # cm (Grid object): # sym (SystemUtility object): # nameJob (str): # dir_o_liFile (str): path where the input pdb files are found # outputDire (str): path where to put the output # mtz (str): path to the mtz file # MW (float): molecular weight # NC (int): number of component # F (str): label for the intensities or the amplitudes in the mtz file # SIGF (str): label for the standard deviation of the intensities or the amplitudes in the mtz file # Intensities (bool): # Aniso (bool): # normfactors (str): path to the binary file with the anisotropy correction for the data # tncsfactors (str): path to the binary file with the translational non crystallographic symmetry correction for the data # nice (int): # RMSD (float): rmsd to be used in the computation # lowR (float): low resolution limit to be used in phaser # highR (float): high resolution limit to be used in phaser # final_rot (): # save_rot (): # frag_fixed (): # spaceGroup (): # sampl (): # ellg (float): Target eLLG for determining number of residues in window # nres (int): Number of residues in window, must be an odd number # rangeocc (list): Minimum and maximum values of occupancy during refinement # merge (bool): Merge refined occupancies from different window offsets to give final occupancies per residue. # If OFF, occupancies from a single window offset will be used, selected using occoffset # occfrac (float): Minimum fraction of the protein for which the occupancy may be set to zero # occoffset (int): If merge is false, then occoffset defines the single window offset from which final occupancies will be taken # ncycles (int): Number of cycles of occupancy refinement to be performed # tops (): # LIMIT_CLUSTER (): # VRMS (): # BFAC (): # BULK_FSOL (): # BULK_BSOL (): # # Returns: # nq (int): number of jobs sent # ensembles (dict): dictionary, keys are the ensemble names and values are the paths to the pdbs # # """ dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "occ.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: print "\nADVISORY: There was some problem generating the links to the mtz, tncs and norm files" pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "occ.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_OCC" + "\n") f.write('HKLIN "occ.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test OCC for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write( "OCCUPANCY WINDOW NRES 1" + "\n") # NOTE: Used to override defaults except if we set window ellg or window nres f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " DISABLE CHECK ON" + "\n") f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") f.write("SOLU 6DIM ENSE ensemble" + str(counter + 1) + " EULER 0.0 0.0 0.0 FRAC 0.0 0.0 0.0\n") # SET THE NON-DEFAULT PARAMETERS # Set the windows for the refinement if they have been given. OR eLLG based or directly number of residues if ellg != None and isinstance(ellg, float): f.write("OCCUPANCY WINDOW ELLG " + str(ellg) + "\n") if nres != None and isinstance(nres, int): f.write("OCCUPANCY WINDOW NRES " + str(nres) + "\n") # Set the valid range for occupancies and configure the refinement strategy if rangeocc != None and isinstance(rangeocc, list) and len(rangeocc) == 2: f.write("OCCUPANCY MIN " + str(rangeocc[0]) + " MAX " + str(rangeocc[1]) + "\n") if merge != None and isinstance(merge, bool): if merge: f.write("OCCUPANCY MERGE ON" + "\n") else: f.write("OCCUPANCY MERGE OFF" + "\n") if occfrac != None and isinstance(occfrac, float): f.write("OCCUPANCY FRAC " + str(ellg) + "\n") if occoffset != None and isinstance(occoffset, int): f.write("OCCUPANCY OFFSET " + str(occoffset) + "\n") if ncycles != None and isinstance(ncycles, int): f.write("OCCUPANCY NCYCLES " + str(ncycles) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startOCCJob(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "OCC of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startOCCJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("occ.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, ensembles def startNMA(DicParameters, cm, sym, nameJob, dir_o_liFile, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, formfactors='FORMFACTORS XRAY'): """ Performs phaser's normal mode analysis for ARCIMBOLDO_BORGES runs. :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param dir_o_liFile: :type dir_o_liFile: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param final_rot: :type final_rot: :param save_rot: :type save_rot: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param sampl: :type sampl: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :return: :rtype: """ dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall if frag_fixed == 1 and isinstance(dir_o_liFile, basestring): # is a directory ensembles = {} rootdir = dir_o_liFile for root, subFolders, files in os.walk(rootdir): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) if hasattr(cm, "channel"): cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") ensembles["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " DISABLE CHECK ON" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAJob(outputDirectory, op): """ :param outputDirectory: :type outputDirectory: :param op: :type op: :return: :rtype: """ f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) if cm is None: nl = 0 for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) print "OTTENUTO",nq,ensembles return nq, ensembles def startNMAFromClusters(DicParameters, cm, sym, ClusAll, ensembles, nameJob, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_rot, save_rot, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, formfactors='FORMFACTORS XRAY'): dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 conv2 = {} for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "nma.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "nma.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) # cm.create_remote_link(os.path.join(cm.remote_library_path,os.path.basename(pdbf)),str(counter)+".pdb") cm.copy_local_file(pdbf, "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") conv2["ensemble" + str(counter)] = pdbf f = open(os.path.join(outputDirectory, str(counter) + ".sh"), "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE NMA" + "\n") f.write('HKLIN "nma.mtz"' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test NMA for Grid" + "\n") f.write("JOBS 1" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES 2" + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " PDBFILE " + str(counter) + ".pdb RMS " + str( RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(counter + 1) + " DISABLE CHECK ON" + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startNMAP1Job(outputDirectory, op): f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "NMA of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startNMAP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("nma.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def shredFromEnt(ent, length): """ :param ent: :type ent: :param length: :type length: :return: :rtype: """ listAll = Bioinformatics.getListCA("ent", ent, "PDB", backbone=True, allInList=True) fullength = len(listAll[0][0]) trozos = fullength - length piecesPDBs = [] for t in range(trozos): piece = listAll[1][t:length] allAtm = [] for resi in piece: for atm in resi: allAtm.append(atm) piecesPDBs.append(getPDBFromListOfAtom(allAtm)[0]) return piecesPDBs def startBRF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param nameJob: :type nameJob: :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param save_rot: :type save_rot: :param sampl: :type sampl: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param USE_TNCS: :type USE_TNCS: :param isOMIT: :type isOMIT: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :return: :rtype: """ global listaEva # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "brf.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "brf.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics.getPDBFromListOfAtom(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=not isOMIT) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_ROT" + "\n") f.write("TARGET ROT BRUTE" + "\n") f.write('HKLIN "brf.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE refinement vs Rotation fucntion " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") f.write("#SPACEGROUP " + spaceGroup + "\n") # f.write("XYZOUT ON"+"\n") # f.write("TOPFILES "+str(1)+"\n") # if not USE_TNCS: f.write("TNCS USE OFF\n") # else: # f.write("TNCS USE ON\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(1) + " DISABLE CHECK ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") """ else: f.write("SOLU SET"+"\n") f.write("SOLU SPAC "+spaceGroup+"\n") if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+" #CLUSTER: "+str(rota["original_rotcluster"].split("_")[-1])+"\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble"+str(1)+" EULER \t"+str((rota["euler"])[0])+" "+str((rota["euler"])[1])+" "+str((rota["euler"])[2])+"\t"+"RFZ "+str(rota["zscore"])+"\n") """ if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("SEARCH METHOD FULL" + "\n") f.write("PEAKS ROT DOWN 0" + "\n") f.write( "ROTATE VOLUME AROUND EULER " + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + " RANGE " + str(5) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "BRF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("brf.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRGR(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, save_rot, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, isOMIT=False, VRMS=False, BFAC=False, sigr=0.0, sigt=0.0, preserveChains=False, BULK_FSOL=-1, BULK_BSOL=-1, formfactors='FORMFACTORS XRAY'): """ Performs phaser's gyre refinement. :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param nameJob: :type nameJob: :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param save_rot: :type save_rot: :param sampl: :type sampl: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param USE_TNCS: :type USE_TNCS: :param isOMIT: :type isOMIT: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param sigr: :type sigr: :param sigt: :type sigt: :param preserveChains: :type preserveChains: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :return: :rtype: """ global listaEva # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" # NOTE: B-factors can not be refined within Gyre apparently if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or (LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde): # print "Checking2 is",inde,len(ClusAll)-1,len(ClusAll) if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rgr.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rgr.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] #print "SHERLOCK Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: stru = Bioinformatics.getStructure("ref", pdbf) pdbsearch = "" for model in stru.get_list(): reference = [] for chain in model.get_list(): for residue in chain.get_list(): reference += residue.get_unpacked_list() pdbmod, cnv = Bioinformatics3.get_pdb_from_list_of_atoms(reference, renumber=isOMIT, uniqueChain=isOMIT, chainFragment=(not isOMIT and not preserveChains)) pdbmod = "MODEL " + str(model.get_id()) + "\n" + pdbmod + "\n\n" pdbsearch += pdbmod pdbsearch += "ENDMDL\n\n" fds = open(os.path.join(outputDirectory, str(counter) + ".pdb"), "w") fds.write(pdbsearch) fds.close() if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".pdb"), "./" + str(ndir - 1) + "/" + str(counter) + ".pdb") f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_GYRE" + "\n") f.write('HKLIN "rgr.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE refinement vs Rotation function " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACG ROT ON TRA ON VRMS " + VRMS + " SIGR " + str(sigr) + " SIGT " + str(sigt) + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") f.write("TNCS USE OFF\n") if not Aniso: #Then you read the corrections and do not compute them f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("ENSEMBLE ensemble" + str(1) + " PDBFILE " + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(1) + " DISABLE CHECK ON" + "\n") f.write("SORT ON" + "\n") if frag_fixed > 1: # add information about fixed fragments f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + spaceGroup + "\n") # NOTE CM: I am now including the LLG as RF parameter in the solu trial if "n_prev_cluster" in rota: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RF "+ str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + " #CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU TRIAL ENSEMBLE ensemble" + str(1) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t"+ "RF "+ str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SEARCH ENSEMBLE ensemble" + str(1) + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") f.write("PEAKS ROT SELECT PERCENT" + "\n") f.write("PEAKS ROT CUTOFF " + str(save_rot) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, 0, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRGRJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RGR of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRGRJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rgr.mtz", False) job.addInputFile("anis.tncs", False) job.addInputFile("anis.norm", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 # NOTE CM: I think we should deprecate this function and substitute it completely with gimble. # it is called, so we need to change the programs accordingly if we don't want that option anymore. # ISSUE 178 in the repo def startRBRP1(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, RMSD, lowR, highR, frag_fixed, spaceGroup, sampl=-1, tops=None, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False, formfactors='FORMFACTORS XRAY'): global listaEva # TODO: This function should use the files with the norm and tncs factors, # but those of the P1 data, that should be generated at the beginning # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" RNP_GYRE = "\n MACMR CHAINS ON" if RNP_GYRE else "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "refP1.mtz")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtzP1_path, "refP1.mtz") ndir += 1 rota = sol[1] prio = (rota["llg"], rota["zscore"]) cou += 1 if frag_fixed == 1 and tops != None and cou >= tops: break pdbf = ensembles[rota["name"]] # print "Preparing: "+str(pdbf)+" as model "+str(counter) flon = open(pdbf, "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break if True: try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") # corresponding = (pdbf.split("/"))[-1] # listona = corresponding.split("_") # pdbid = listona[0] # model = listona[1] # idSolution = listona[2] # idSolution,ext = idSolution.split(".") lop = open(outputDirectory + "/" + str(counter) + ".pdb", "r") alls = lop.readlines() lop.close() countt = 0 scrivi = True lai = None previous = -1 lineaDascri = None for linea in alls: if linea.startswith("ATOM") or linea.startswith("HETATM"): if scrivi: if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) lai = open(outputDirectory + "/" + str(countt) + "_" + str(counter) + ".pdb", "w") if lineaDascri != None: lai.write(lineaDascri) lineaDascri = None countt += 1 scrivi = False residuo = int((linea[22:26]).strip()) if previous > 0 and (residuo == (previous + 1)) or (residuo == previous): lai.write(linea) previous = residuo elif previous > 0: scrivi = True lineaDascri = linea previous = residuo else: lai.write(linea) previous = residuo if lai != None: lai.close() if hasattr(cm, "channel"): cm.copy_local_file(lai.name, "./" + str(ndir - 1) + "/" + os.path.basename(lai.name)) numFrag = countt f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "refP1.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE refinement in P1 " + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES " + str(1) + "\n") for i in range(countt): f.write( "ENSEMBLE ensemble" + str(i) + " PDBFILE " + str(i) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE ensemble" + str(1) + " DISABLE CHECK ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write( "SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC P1\n") for i in range(countt): if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write("SOLU 6DIM ENSE ensemble" + str(i) + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + "\n") if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) listaEva[counter] = (0, 0, numFrag, rota) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPP1Job(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP in P1 of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPP1Job, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("refP1.mtz", False) for i in range(numFrag): job.addInputFile(str(i) + "_" + ".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) # job.addOutputFile("re_.pdb",True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") # if sol == None: # print "ho raggiunto ultimo" # (nc,nq) = cm.submitJob(job,isthelast=True) # else: (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def readMR_ELLGout(path_out_file=None, out_text=None, pdb_filepath=None, nfrag=1): """Process an out file from MR_ELLG mode from phaser and returns the information. :param path_out_file: :type path_out_file: :param out_text: :type out_text: :param pdb_filepath: :type pdb_filepath: :param nfrag: :type nfrag: :return: :rtype: """ if nfrag != 1: # TODO: support more fragments print "Currrently only a single fragment is supported" sys.exit(0) if path_out_file != None and out_text == None: out_file = open(path_out_file, 'r') out_lines = out_file.readlines() del out_file elif out_text != None: out_lines = out_text.splitlines() pdb_file = open(pdb_filepath, 'r') list_ca = Bioinformatics.getListCA(name=pdb_filepath[:-4], pdbf=pdb_file, mode='PDB') nres = len(list_ca[0][0]) results = {} regex_table1 = re.compile("eLLG: eLLG of ensemble alone") # eLLG: eLLG of ensemble alone # eLLG RMSD frac-scat Ensemble # 216.6 0.600 0.18438 ensemble1 regex_table2 = re.compile("eLLG-reso: Resolution to achieve target eLLG") # this can be found three times #eLLG-reso: Resolution to achieve target eLLG (60) #eLLG-reso Ensemble # 4.180 ensemble1 regex_table3 = re.compile("Alanine residues for eLLG target =") # Alanine residues for eLLG target = 88 read=0 for i in range(len(out_lines)): if bool(regex_table3.findall(out_lines[i])): nres_for_target=int(out_lines[i].split('=')[1]) read=read+1 if bool(regex_table1.findall(out_lines[i])): list_val=out_lines[i+2].split() ensemble_name = str(list_val[3]) frac_scat = float(list_val[2]) current_ellg = float(list_val[0]) read = read + 1 if bool(regex_table2.findall(out_lines[i])): try: res_for_target = float(out_lines[i+2].split()[0]) except ValueError: res_for_target = float(out_lines[i+2].split()[1].split('(')[0]) read = read + 1 if read==3: #Then we have read already all the information we needed, we can break break results = {'number_of_residues': nres, 'fraction_scattering': frac_scat, 'ellg_current_ensemble': current_ellg, 'nres_for_target_ellg': nres_for_target, 'resolution_for_target': res_for_target, 'fullpath': pdb_filepath} return results def readMR_ELLGsum(path_sum_file, nfrag=1): """ :param path_sum_file: :type path_sum_file: :param nfrag: :type nfrag: :return: :rtype: """ dict_result = {} file_sum = open(path_sum_file, 'r') lines = file_sum.readlines() del file_sum for i in range(0, len(lines), 5): ensemble = getNewPathFromMerging(path_sum_file, ((lines[i + 1]).split())[1]) dict_result[ensemble] = {} dict_result[ensemble]['number_of_residues'] = int(((lines[i + 2]).split())[1]) dict_result[ensemble]['fraction_scattering'] = float(((lines[i + 2]).split())[3]) if ((lines[i + 2]).split())[8] != 'None': # the nres was available dict_result[ensemble]['nres_for_target_ellg'] = int(((lines[i + 2]).split())[8]) else: # no nres was computed dict_result[ensemble]['nres_for_target_ellg'] = None dict_result[ensemble]['ellg_current_ensemble'] = float((lines[i + 3].split())[2]) dict_result[ensemble]['resolution_for_target'] = float((lines[i + 3].split())[7]) return dict_result def evaluateMR_ELLG(DicParameters, cm, DicGridConn, nameJob, outputDicr, nqueue, ensembles, isArcimboldo=False, nfrag=1): '''This function reads the output from the startMR_ELLG function and evaluates it, producing a sum file. Args: DicParameters (dict) cm (Grid object): DicGridConn nameJob outputDicr nqueue ensembles isArcimboldo (bool): to indicate whether it is an Arcimboldo or a Borges run nfrag (int): number of fixed fragments Returns: dict_result (dict): structure is dict_result[model_path]={'fraction_scattering':float,'number_of_residues':int,'nres_for_target_ellg':int,'fullpath':str,'resolution_for_target':float} Notes: - It writes a sum file - If open, it closes the connection - isArcimboldo and nfrag are currently placeholders, the eLLG computation in ARCIMBOLDO is yet be implemented ''' # Prepare the directories dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): # Only in remote grid case current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) # Prepare variables to read fromIn = 0 toIn = nqueue - 1 ndir = 0 dict_result = {} dirente2 = "" current_dir2 = "" # Read while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' mr_ellg file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir = ndir + 1 while 1: outname = str(fromIn) + ".out" local_path = os.path.join(outputDic, outname) if hasattr(cm, 'channel'): file_ended = cm.get_remote_file(remotefile=outname, localfile=local_path, conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(file_ended, bool) and not file_ended: print "File " + outname + " not ready sleeping 3 seconds" time.sleep(3) continue break elif os.path.exists(local_path): checkYOURoutput(myfile=local_path, conditioEND=PHASER_OUT_END_CONDITION_LOCAL, testEND=PHASER_OUT_END_TEST) break else: time.sleep(3) continue # Read the information from the out out_file = open(local_path, 'r') out_content = out_file.read() print out_content del out_file pdb_filepath = outputDic + str(fromIn) + ".pdb" results = readMR_ELLGout(out_text=out_content, pdb_filepath=pdb_filepath) name_ensemble = ensembles['ensemble' + str(fromIn + 1)] dict_result[name_ensemble] = results lastFile = False if fromIn == toIn: lastFile = True candelete = True if candelete: try: numb = fromIn os.remove(outputDic + str(numb) + ".pdb") os.remove(outputDic + str(numb) + ".sh") os.remove(outputDic + str(numb) + ".out") except Exception: # Then we are not in local pass fromIn = fromIn + 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print 'current_dir',current_dir # print "dirente",dirente cm.change_remote_dir(current_dir) # print cm.remove_remote_dir(os.path.basename(dirente)) # print "cm.get_remote_pwd()",cm.get_remote_pwd() SystemUtility.close_connection(DicGridConn, DicParameters, cm) # Write the SUM file filepath_sum = os.path.join(dirente, 'ellg_computation.sum') file_sum = open(filepath_sum, 'w') filepath_table = os.path.join(dirente, 'table_ellg.sum') file_table = open(filepath_table, 'w') file_table.write("Size\teLLG\tName\n") for ensemble in dict_result.keys(): file_sum.write("===========\n") file_sum.write("ENSEMBLE: " + ensemble + "\n") file_sum.write( "NRESIDUES: " + str(dict_result[ensemble]['number_of_residues']) + "\t FRACTION_SCATTERING: " + str( dict_result[ensemble]['fraction_scattering']) + "\t NRESIDUES FOR TARGET ELLG: " + str( dict_result[ensemble]['nres_for_target_ellg']) + "\n") file_sum.write("CURRENT ELLG: " + str( dict_result[ensemble]['ellg_current_ensemble']) + "\t RESOLUTION FOR TARGET ELLG: " + str( dict_result[ensemble]['resolution_for_target']) + "\n") file_sum.write("===========\n") # TEMPORARY? TABLE TO ANALYZE ELLG file_table.write(str(dict_result[ensemble]['number_of_residues']) + '\t' + str( dict_result[ensemble]['ellg_current_ensemble']) + '\t' + ensemble + '\n') del file_table del file_sum return dict_result def evaluateOCC(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, ensembles): """ Function to evaluate the results of the LLG-guided pruning with phaser :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param nqueue: :type nqueue: :param ensembles: :type ensembles: :return: :rtype: Does not return anything, but closes the connection with SystemUtility """ dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.readlines() f.close() listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) fe = open(outputDic + str(fromIn) + ".pdb", "r") fer = fe.readlines() fe.close() clus = None for luc in fer: if luc.startswith("REMARK CLUSTER"): clus = luc.split()[2] break if clus is None: clus = "-1" for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: fut_name = outputDic + os.path.basename(ensembles["ensemble" + str(tud)][:-4] + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() # TESTING ************************************************************************************** # Determine the number of residues set to an occupancy of zero # NOTE CM: This approach is not general, if we would change the occupancy min and max # we would need something different p = subprocess.Popen('grep ATOM ' + os.path.join(outputDic, ler) + ' | grep CA | grep -c " 0.00" ', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() # print 'outp',outp res_removed = int(outp) # print 'We have removed ',res_removed,' residues from ',os.path.join(outputDic, ler), # sys.exit(0) # TESTING ************************************************************************************** # Removing the output file n.1.pdb os.remove(os.path.join(outputDic, ler)) # Removing the file with the partial occupancies os.remove(os.path.join(outputDic,str(tud)+'.'+str(cud)+'.'+'refined_occupancy.pdb')) # Writing the output file with the correct name and remark fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(clus) + "\n") fg.write("REMARK NRES OCC SET TO ZERO " + str(res_removed) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') try: os.remove(outputDic + str(nb) + ".1.mtz") except: pass if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateNMA(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, rotclusid, nqueue, ensembles): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param rotclusid: :type rotclusid: :param nqueue: :type nqueue: :param ensembles: :type ensembles: :return: :rtype: """ dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = False ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print 'Evaluating ' + str(fromIn) + ' translation file' if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_NMA_END_CONDITION, testEND=PHASER_NMA_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_NMA_END_CONDITION_LOCAL, PHASER_NMA_END_TEST) while True: btest = False f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break if btest: break break else: time.sleep(3) continue name = str(fromIn) for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tud == fromIn: fut_name = outputDic + os.path.basename( ensembles["ensemble" + str(tud)][:-4] + "_" + str(cud) + ".pdb") fg = open(os.path.join(outputDic, ler), "r") fgall = fg.read() fg.close() os.remove(os.path.join(outputDic, ler)) fg = open(fut_name, "w") fg.write("REMARK CLUSTER " + str(rotclusid) + "\n") fg.write(fgall) fg.close() candelete = False try: nb = fromIn os.remove(outputDic + str(nb) + '.pdb') os.remove(outputDic + str(nb) + '.sol') os.remove(outputDic + str(nb) + '.out') os.remove(outputDic + str(nb) + '.sh') if os.path.exists(outputDic + str(nb) + '.mtz'): os.remove(outputDic + str(nb) + '.mtz') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.mat'): os.remove(outputDic + str(nb) + '.mat') if os.path.exists(outputDic + str(nb)): os.remove(outputDic + str(nb)) except: print 'Cannot find the file to delete.' # print sys.exc_info() # traceback.print_exc(file=sys.stdout) fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated: print 'Long sleep, queue not ready' time.sleep(60) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) def evaluateBRF(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param maintainOrigCoord: :type maintainOrigCoord: :param cell_dim: :type cell_dim: :param quate: :type quate: :param convNames: :type convNames: :param models_directory: :type models_directory: :param ensembles: :type ensembles: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param model_file: :type model_file: :param isOMIT: :type isOMIT: :return: :rtype: """ global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue # wse = cm.get_remote_file(str(i)+".1.pdb",os.path.join(outputDic,str(i)+".1.pdb"),lenght_ext=6,conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION,testEND=PHASER_RLIST_SOL_PDB_END_TEST) # if isinstance(wse,bool) and not wse: # print "File "+str(i)+".1.pdb not ready sleeping 3 seconds..." # time.sleep(3) # continue wse = cm.get_remote_file(str(i) + ".rlist", os.path.join(outputDic, str(i) + ".rlist"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".rlist not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: # atest = os.path.exists(os.path.join(outputDic,str(i)+".1.pdb")) atest = True btest = os.path.exists(os.path.join(outputDic, str(i) + ".rlist")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" new_euler = [] old_euler = [] llg = 0.0 zscore = 0.0 while riprova: try: try: fer = open(outputDic + str(i) + ".rlist", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 data_chain = {} for ferlinea in ferli: if ferlinea.startswith("SOLU TRIAL"): listy = ferlinea.split() llg = 0.0 zscore = 0.0 try: zscore = float(listy[9]) except: zscore = 0.0 new_euler = [float(listy[5]), float(listy[6]), float(listy[7])] try: p = subprocess.Popen('grep -A1 loggraph ' + outputDic + str(i) + ".out", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() outlist = outp.splitlines() llg = float(outlist[1].split()[1]) except: llg = 0.0 break # NOTE: TEMPORARY for test gyre structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".pdb")) new_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(new_euler[0], new_euler[1], new_euler[2])], cell_dim, return_atoms=True) new_list_atoms += chain_atoms new_list_atoms = sorted(new_list_atoms, __cmp_atom) fer = open(outputDic + str(i) + ".sh", "r") ferli = fer.readlines() fer.close() for ferlinea in ferli: if ferlinea.startswith("#SPACEGROUP"): spaceGroup = ferlinea.split("#SPACEGROUP")[1] spaceGroup = spaceGroup.strip() if ferlinea.startswith("ROTATE"): lisr = ferlinea.split() old_euler = [float(lisr[4]), float(lisr[5]), float(lisr[6])] structure = Bioinformatics.getStructure("full", os.path.abspath( outputDic + os.path.basename(str(i) + ".pdb"))) old_list_atoms = [] resids = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): if resi.get_id() not in resids: resids.append(resi.get_id()) for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() chain_atoms = rotateListByMatrix(0, 0, chain_atoms, [ matrixFromEulerAngles(old_euler[0], old_euler[1], old_euler[2])], cell_dim, return_atoms=True) old_list_atoms += chain_atoms old_list_atoms = sorted(old_list_atoms, __cmp_atom) PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_original.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBORIGINAL) f.close() PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] f = open(os.path.abspath(outputDic + os.path.basename(str(i) + ".1_created.pdb")), "w") CRD = '{r:<6}{a:>9.3f}{b:>9.3f}{c:>9.3f}{alpha:7.2f}{beta:7.2f}{gamma:7.2f} {sp:<11}{z:>4}' data = dict(r='CRYST1', a=float(cell_dim[0]), b=float(cell_dim[1]), c=float(cell_dim[2]), alpha=float(cell_dim[3]), beta=float(cell_dim[4]), gamma=float(cell_dim[5]), sp=spaceGroup, z=1) f.write(CRD.format(**data) + "\n") f.write(PDBCREATED) f.close() # NOTE: TEST END cancel this block after test if os.path.exists(models_directory): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics.getStructure("full", ori_path) new_list_resi = [] structureCOMP = Bioinformatics.getStructure("full2", cStringIO.StringIO(PDBCREATED)) for model in structureCOMP.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): new_list_resi += [resi] old_list_resi = [] structureREF = Bioinformatics.getStructure("full3", cStringIO.StringIO(PDBORIGINAL)) for model in structureREF.get_list(): for chain in model.get_list(): chain_name = chain.get_id() for resi in chain.get_list(): old_list_resi += [resi] # print "+++++++",len(old_list_resi),len(new_list_resi) # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structureFINE, pdball) = Bioinformatics.getSuperimp( [old_list_resi], [new_list_resi], "PRECOMPUTED", algorithm="biopython", allAtomsModel=PDBCREATED, backbone=True) # ,listmodel=listcomp) lineas = PDBCREATED.splitlines() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structureFINE, listCoord, outputDic, title) else: rmsd = -100 nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = -100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = new_euler ro["frac"] = [0.0, 0.0, 0.0] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: # print "Trying to remove *sum files" # print sys.exc_info() pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.mtz") except: # print "Trying to remove *1.mtz files" # print sys.exc_info() pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRGR(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, cell_dim, quate, convNames, models_directory, ensembles, LIMIT_CLUSTER=None, model_file=None, isOMIT=False, ent=None): """ Process the output from phaser's gyre mode. :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param maintainOrigCoord: :type maintainOrigCoord: :param cell_dim: :type cell_dim: :param quate: static reference to the Quaternions class :type quate: instance of Quaternions.Quaternions() :param convNames: :type convNames: :param models_directory: :type models_directory: :param ensembles: :type ensembles: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param model_file: :type model_file: :param isOMIT: :type isOMIT: :param ent: :type ent: :return: :rtype: """ global listaEva if models_directory == None: models_directory = "" Clud = [] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) if os.path.exists(outputDicr + "clustersTmp.sum"): Clusters, cloplo = readClustersFromSUM(outputDic + "clustersTmp.sum") recover = True fromIn = cluster status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) #print 'SHERLOCK ro["name"]',ro["name"] yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True PDBCREATED = "" PDBORIGINAL = "" while riprova: try: try: fer = open(outputDic + str(i) + ".sol", "r") # PHASER 2.9 sol output from gyre # # refinement vs Rotation function # SOLU SET RF/RFZ=24/2.9 GRF=(23:28) # SOLU HISTORY GYRE(1:1) # SOLU SPAC P 1 21 1 # SOLU GYRE ENSEMBLE ensemble1[A] EULER 116.930 54.602 260.163 # ANGLE 2.48418 DISTANCE 0 # SOLU GYRE ENSEMBLE ensemble1[B] EULER 113.271 58.465 261.716 # ANGLE 2.98284 DISTANCE 1.16366 ferli = fer.readlines() fer.close() llg = 0.0 zscore = 0.0 data_chain = {} first = False for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): if first: break else: listy = ferlinea.split() #print '**************************************' #print 'listy',listy #if not listy[2].startswith('RF++'): # otherwise we do not have info from prev rot # rot_infolist=((listy[2].split('='))[1]).split('/') # print 'rot_infolist',rot_infolist #llgrot=float(rot_infolist[0]) #zscorerot=float(rot_infolist[1]) gyre_infolist=((listy[3].split('='))[1]).split(':') #print 'gyre_infolist',gyre_infolist llg = float((gyre_infolist[1].split(')'))[0]) #print '**************************************' first = True #if ferlinea.startswith("SOLU GYRE"): #topl = ferlinea.split() # ['SOLU', 'GYRE', 'ENSEMBLE', 'ensemble1[A]', 'EULER', '116.930', '54.602', '260.163', # '#', 'ANGLE', '2.48418', 'DISTANCE', '0'] #print 'ferlinea',ferlinea #print 'topl',topl #chainid = topl[4][-2] #print 'chainid',chainid # NOTE CM: data_chain is not being used elsewhere I believe, and in any case information # related to each chain are found in different place in phaser 2.9 # OLD: #data_chain[chainid] = [float(topl[6]), float(topl[7]), float(topl[8]), float(topl[10]), # float(topl[11]), float(topl[12])] # NEW: # euler angles for the chain are in: #euler_gyre_chain=[float(topl[5]),float(topl[6]),float(topl[7])] #angle_gyre_chain=float(topl[10]) #distance_gyre_chain=float(topl[12]) #print 'euler_gyre_chain, angle_gyre_chain, distance_gyre_chain ' #print euler_gyre_chain, angle_gyre_chain, distance_gyre_chain if os.path.exists(models_directory): structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(i) + ".1.pdb")) new_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] #chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() new_list_atoms += chain_atoms # print "------",len(new_list_atoms),len(new_list_atoms) new_list_atoms = sorted(new_list_atoms, key=lambda x: x.get_parent().get_full_id()[3][1:]) ori_path = SystemUtility.findInSubdirectory( os.path.basename(convNames[str(i) + ".1.pdb"][0]), subdirectory=models_directory) # if models_directory in convNames[str(i)+".1.pdb"][0]: # ori_path = convNames[str(i)+".1.pdb"][0] # else: # ori_path = os.path.join(models_directory,os.path.basename(convNames[str(i)+".1.pdb"][0])) if model_file != None and os.path.exists(model_file): ori_path = model_file # new_list_atoms = sorted(new_list_atoms,__cmp_atom) if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, os.path.basename(convNames[str(i) + ".1.pdb"][0]).split("-")[ 0] + ".pdb") structure = Bioinformatics3.get_structure("full", ori_path) old_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() old_list_atoms += chain_atoms # print "------",len(old_list_atoms),len(new_list_atoms) old_list_atoms = sorted(old_list_atoms, key=lambda x: x.get_parent().get_full_id()[3][ 1:]) # sort by residue number PDBCREATED = Bioinformatics3.get_pdb_from_list_of_atoms(new_list_atoms)[0] PDBORIGINAL = Bioinformatics3.get_pdb_from_list_of_atoms(old_list_atoms)[0] # file_a=open(str(i)+'_created.pdb','w') # file_a.write(PDBCREATED) # file_a.close() # file_b=open(str(i)+'_original.pdb','w') # file_b.write(PDBORIGINAL) # file_b.close() try: # HOW THE RMSDs WERE COMPUTED BEFORE WITH BIOINFORMATICS #(rmsT, nref, ncom, allAtoms, compStru, pda) = Bioinformatics.getSuperimp(PDBORIGINAL, # PDBCREATED, # "PDBSTRINGBM_RESIDUES_CONSERVED", # algorithm="nigels-core2", # backbone=True, # superpose_exclude=1, # n_iter=None, # onlyCA=True) # results = BORGES_MATRIX.perform_superposition(reference=io.StringIO(SystemUtility.py2_3_unicode(PDBORIGINAL)), # target=io.StringIO(SystemUtility.py2_3_unicode(PDBCREATED)), # sensitivity_ah=0.0000001, # sensitivity_bs=0.0000001, # peptide_length=3, write_graphml=False, # write_pdb=False, # use_signature=True, # restrictions_edges = None) # results dictio # {"rmsd": best_rmsd_global, "size": best_size_global, "associations": best_association_global, # "transf": best_transf_global, "graph_ref": best_g_a_global, # "grapf_targ": best_g_b_global, "match": best_match_global, "explored": best_explored_global, # "correlation": best_corr_global, "pdb_target": pdbmod, "pdb_core_target": pdbmod2, # "ca_target": [atom for atom in allAtoms if atom.get_name() == "CA"]} dictionary_dist = Bioinformatics3.get_CA_distance_dictionary(io.StringIO(SystemUtility.py2_3_unicode(PDBORIGINAL)), io.StringIO(SystemUtility.py2_3_unicode(PDBCREATED)), max_rmsd=1000, last_rmsd=1000, recompute_rmsd=True, cycles=1, cycle=1, before_apply="automatic", get_superposed_atoms=False) rmsT=Bioinformatics3.get_rmsd_from_distance_hash(dictionary_dist) nref=len(dictionary_dist) #original ncom=nref #created #allAtoms=results["rmsd"] #compStru=results["rmsd"] #pda=results["rmsd"] except: print "Error on file" print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsT = 100 nref = 0 ncom = 0 # file_c=open(str(i)+'_superposed.pdb','w') # file_c.write(pda[0]) # file_c.close() # print '\n' rmsd = rmsT if rmsd == 100: nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = 100 nref = 0 ncom = 0 ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) nresc = -1 rms0 = -1 diffrms = -1 p = subprocess.Popen('grep -c " CA " ' + os.path.join(outputDic, str(i) + ".1.pdb"), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() nresc = int(outp) nmin = nresc - 10 ndiff = -1 # if os.path.exists(ent): # # First find the best RT to superpose the template to the final structure # # (rmsent, nrefent, ncoment, allent, cment, dicent) = Bioinformatics.getSuperimp( # os.path.join(outputDic, str(i) + ".1.pdb"), ent, "PDB", algorithm="superpose", # backbone=True) # # # ref = os.path.join(outputDic, str(i) + ".1.pdb") # # target = ent # # # # dictionary_dist = Bioinformatics3.get_CA_distance_dictionary( # # io.StringIO(SystemUtility.py2_3_unicode(PDBORIGINAL)), # # io.StringIO(SystemUtility.py2_3_unicode(PDBCREATED)), # # max_rmsd=1000, # # last_rmsd=1000, # # recompute_rmsd=True, # # cycles=1, cycle=1, # # before_apply="automatic", # # get_superposed_atoms=False) # # # # rmsT = Bioinformatics3.get_rmsd_from_distance_hash(dictionary_dist) # # nref = len(dictionary_dist) # original # # ncom = nref # created # # print 'SHERLOCK results',results # # sys.exit(0) # # (rms0, nref0, ncom0, all0, cm0, dic0) = Bioinformatics.getSuperimp(ori_path, ent, "PDB", # algorithm="superpose", # backbone=True) # rmsd_init = rms0 # core_init = ncom0 # dicc_init = dic0 # rmsd_fin = rmsent # core_fin = ncoment # dicc_fin = dicent # # (rmsd_core_common_init, rmsd_core_common_fin, # core_common) = Bioinformatics.getRMSDfromCommonCore(dicc_init, dicc_fin) # # diffrmsd = rmsd_core_common_init - rmsd_core_common_fin # # print "Model:",os.path.join(outputDic,str(i)+".1.pdb"),"RMSD",rmsent,"NREF",nrefent,"NCOM",ncoment # f = open(outputDic + "../pm_gyre.sum", "a") # f.write("MODEL: " + convNames[str(i) + ".1.pdb"][0] + " SIZE: " + str( # nresc) + " RMSD_GYRE: " + str("%.2f" % rmsd) + " RMSD_INIT: " + str( # "%.2f" % rmsd_init) + " CORE_INIT: " + str(core_init) + " RMSD_FIN: " + str( # "%.2f" % rmsd_fin) + " CORE_FIN: " + str(core_fin) + " RMSD_INIT_COMMON: " + str( # "%.2f" % rmsd_core_common_init) + " RMSD_FIN_COMMON: " + str( # "%.2f" % rmsd_core_common_fin) + " RMSD_DIFF: " + str( # "%.2f" % diffrmsd) + " CORE_COMMON: " + str(len(core_common.keys())) + " LLG: " + str( # llg) + "\n") # f.close() # """ # dbdir = os.path.join(outputDicr,"pmdb") # stored = os.path.join(outputDicr,"stored_sup") # if os.path.exists(dbdir): # shutil.rmtree(dbdir) # # if not os.path.exists(stored): # os.makedirs(stored) # # os.makedirs(dbdir) # nameofent = os.path.basename(ent)[:5] # shutil.copyfile(ent, os.path.join(dbdir,nameofent+".pdb")) # # print "Starting BORGES to find",os.path.join(outputDic,str(i)+".1.pdb"),"into",ent,"..." # Parameters = {} # Parameters["model"] = ori_path #os.path.join(outputDic,str(i)+".1.pdb") # Parameters["dir"] = dbdir # Parameters["wdir"] = outputDicr # Parameters["continous"] = 70 # Parameters["jumps"] = 70 # Parameters["ncssearch"] = False # Parameters["multimer"] = True # Parameters["rmsd_min"] = 0.0 # Parameters["rmsd_max"] = 6.0 # Parameters["rmsd_clustering"] = 0.0 # Parameters["exclude_residues_superpose"] = 0 # Parameters["nilges"] = 10 # Parameters["enhance_fold"] = True # Parameters["remove_coil"] = True # pars,opt = BORGES_MATRIX.get_artificial_parser_option(Parameters) # BORGES_MATRIX.startBORGES_MATRIX(pars,opt,doCluster=False,superpose=False,sym=sym,process_join=True) # if os.path.exists(os.path.join(outputDicr,"./library/")): # # 2: Find the best superposition between the template and all the extracted models # f = open(os.path.join(outputDicr,"./library/"+"list_rmsd.txt"),"r") # allines - f.readlines() # rmsd_gyre_vs_ent = float(allines[0].split()[0]) # best_model = allines[0].split()[1] # shutil.copyfile(os.path.join(outputDicr,"./library/"+best_model),os.path.join(stored,str(i)+".1.sup.pdb")) # shutil.rmtree(dbdir) # os.remove(os.path.join(outputDicr,"input_search.pdb")) # if os.path.exists(os.path.join(outputDicr,"./library/")): # shutil.rmtree(os.path.join(outputDicr,"./library/")) # """ if not isOMIT: f = open(outputDic + "../models.sum", "a") f.write("===========\n") # f.write("MODEL: "+str(i)+" CORRESP.: "+os.path.abspath(outputDic+os.path.basename(convNames[str(i)+".1.pdb"][0]))+" RMSD_GYRE: "+str("%.2f" % rmsd)+" RMSD_INIT: "+str("%.2f" % rms0)+" RMSD_ENT: "+str("%.2f" % rmsd_gyre_vs_ent)+" DIFF: "+str("%.2f" % diffrms)+" GYRE_ALIGNED_RES: "+str(ncoment)+" RESDIFF: "+str(ndiff)+" OVER_RES: "+str(nresc)+" LLG: "+str(llg)+"\n") f.write("MODEL: " + str(i) + " CORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + " RMSD_GYRE: " + str( "%.2f" % rmsd) + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF_ATM: " + str( nref) + "\t" + "NCOM_ATM: " + str(ncom) + "\n") f.write("===========\n") f.close() else: if len(Clud) <= LIMIT_CLUSTER: for q in range(len(Clud), LIMIT_CLUSTER + 1): Clud.append({"heapSolutions": ADT.Heap()}) ro["llg"] = llg ro["zscore"] = zscore ro["euler"] = data_chain["A"][:3] ro["frac"] = data_chain["A"][3:] Clud[LIMIT_CLUSTER]["heapSolutions"].push((-1 * ro["llg"], -1 * ro["zscore"]), ro) # PULIZIA FILES INUTILI if os.path.exists(models_directory): """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass f = open(outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0]), "w") f.write(PDBCREATED) f.close() """ if maintainOrigCoord and rmsd != -100: shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(os.path.join(outputDic, str(i) + ".1.pdb"), outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) else: ensembles[convNames[str(i) + ".1.pdb"][1]] = convNames[str(i) + ".1.pdb"][0] # print "=======",str(i)+".1.pdb","......",convNames[str(i)+".1.pdb"],"oooooooooo",convNames[str(i)+".1.pdb"][1],"////",ensembles[convNames[str(i)+".1.pdb"][1]],"------",convNames[str(i)+".1.pdb"][0] # NOTE: TEMPORARY FOR TESTING GYRE try: os.remove(outputDic + str(i) + ".pdb") except: pass os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sol") except: pass try: os.remove(outputDic + str(i) + ".sum") except: pass try: os.remove(outputDic + str(i) + ".rlist") except: pass # remove extra file .gyre.rlist try: os.remove(outputDic + str(i) + "gyre.rlist") except: pass # NOTE: ENDING test (decomment when finished) try: os.remove(outputDic + str(i) + ".1.pdb") except: pass try: os.remove(outputDic + str(i) + ".1.mtz") except: pass # for t in range(numFrag): # os.remove(outputDic+str(t)+"_"+str(i)+".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) # os.remove(outputDic+"rgr.mtz") if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rgr.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) if not isOMIT: return ensembles, CluWork else: return ensembles, Clud def evaluateRefP1(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, maintainOrigCoord, quate, convNames, ensembles, LIMIT_CLUSTER=None): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param maintainOrigCoord: :type maintainOrigCoord: :param quate: :type quate: :param convNames: :type convNames: :param ensembles: :type ensembles: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :return: :rtype: """ global listaEva dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = "--" toIn = 1 # rotas = (CLUDATA["heapSolutions"]).asList() i = 0 ndir = 0 dirente2 = "" current_dir2 = "" CluWork = [] while len(listaEva.keys()) > 0: i = (sorted(listaEva.keys()))[0] nc, nq, numFrag, ro = listaEva[i] # nameJobi = nameJob+"_"+str(i) yetEvaluated = True if yetEvaluated: print "Evaluating " + str(i) + " model" if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(i) + ".out", os.path.join(outputDic, str(i) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".1.pdb", os.path.join(outputDic, str(i) + ".1.pdb"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".1.pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(i) + ".sol", os.path.join(outputDic, str(i) + ".sol"), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(i) + ".sol not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(i)+".1.mtz") # print cm.remove_remote_file(str(i)+".1.pdb") # print cm.remove_remote_file(str(i)+".sh") # print cm.remove_remote_file(str(i)+".sol") # print cm.remove_remote_file(str(i)+".sum") # for t in range(numFrag): # print cm.remove_remote_file(str(t)+"_"+str(i)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(i) + ".out")): checkYOURoutput(os.path.join(outputDic, str(i) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(i) + ".1.pdb")) btest = os.path.exists(os.path.join(outputDic, str(i) + ".sol")) if atest and btest: break break else: time.sleep(3) continue riprova = True while riprova: try: try: tupla = Bioinformatics.getFragmentListFromPDBUsingAllAtoms(convNames[str(i) + ".1.pdb"][0], False) listcomp = tupla[1] # CALCOLO DEL RMSD FRA IL MODELLO VECCHIO E IL MODELLO NUOVO (rmsd, nref, ncom, listCoord, structure, pdball) = Bioinformatics.getSuperimp( convNames[str(i) + ".1.pdb"][0], outputDic + str(i) + ".1.pdb", "PDB", algorithm="biopython", listmodel=listcomp) flon = open(outputDic + str(i) + ".1.pdb", "r") lineas = flon.readlines() flon.close() title = "--" for li in lineas: lis = li.split() if lis[0] == "REMARK" and lis[1] == "TITLE": title = lis[2] break if lis[0] == "ATOM" or lis[0] == "HETATM": break rotateStructureByListCoord(i, 1, structure, listCoord, outputDic, title) except: rmsd = -100 nref = 0 ncom = 0 # if rmsd == -100: # print "ATTENTION: Models not comparable!!!!" # tra,fixe,dis = readTranslationsFTF(outputDic,str(i),quate,"RNP_P1") fer = open(outputDic + str(i) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0 zscore = 0 for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): listy = ferlinea.split() llg = float(listy[2][4:]) zscore = 0.0 try: zscore = float(listy[3][5:]) except: zscore = 0.0 break ro["llg"] = llg ro["zscore"] = zscore CluWork = saveRotations(DicParameters, [[], [], {(ro["name"], ro["numInRlist"]): ro}, CluWork]) f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(i) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) + "\t RMSD: " + str( rmsd) + "\t" + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF: " + str( nref) + "\t" + "NCOM: " + str(ncom) + "\n") f.write("===========\n") f.close() # PULIZIA FILES INUTILI if maintainOrigCoord and rmsd != -100: os.remove(outputDic + str(i) + ".1.pdb") shutil.move(outputDic + str(i) + "_" + str(1) + "_rot.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) else: try: os.remove(outputDic + str(i) + "_" + str(1) + "_rot.pdb") except: pass shutil.move(outputDic + str(i) + ".1.pdb", outputDic + os.path.basename(convNames[str(i) + ".1.pdb"][0])) ensembles[convNames[str(i) + ".1.pdb"][1]] = outputDic + os.path.basename( convNames[str(i) + ".1.pdb"][0]) os.remove(outputDic + str(i) + ".pdb") os.remove(outputDic + str(i) + ".sh") os.remove(outputDic + str(i) + ".out") try: os.remove(outputDic + str(i) + ".sum") except: pass os.remove(outputDic + str(i) + ".sol") try: os.remove(outputDic + str(i) + ".1.mtz") except: pass for t in range(numFrag): os.remove(outputDic + str(t) + "_" + str(i) + ".pdb") del listaEva[i] i += 1 if i % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) riprova = False now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) except: # print "Error...Trying to read again output files..." # print sys.exc_info() # traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) os.remove(outputDic + "refP1.mtz") if hasattr(cm, "channel"): print cm.remove_remote_file(nameJob + ".cmd") print cm.remove_remote_file("refP1.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print "Now all models are performed." listaEva = {} SystemUtility.close_connection(DicGridConn, DicParameters, cm) return CluWork, ensembles def startRNP(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False, computeLLGonly=False, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param nameJob: :type nameJob: :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param nice: :type nice: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param spaceGroup: :type spaceGroup: :param frag_fixed: :type frag_fixed: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param sampl: :type sampl: :param USE_TNCS: :type USE_TNCS: :param USE_RGR: :type USE_RGR: :param BFAC: :type BFAC: :param VRMS: :type VRMS: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param RNP_GYRE: :type RNP_GYRE: :param computeLLGonly: indicate if only an LLG computation must be performed, without refinement :type computeLLGonly: bool :return: :rtype: """ # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" RNP_GYRE = "\n MACMR CHAINS ON" if RNP_GYRE else "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if not (os.path.exists(outputDire)): os.makedirs(outputDire) if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall conv2 = {} for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] if computeLLGonly: rota['euler'] = [0.0, 0.0, 0.0] rota['frac'] = [0.0, 0.0, 0.0] prio = (rota["llg"], rota["zscore"]) if tops != None and cou >= tops: break list_pdbs = {} ct = 0 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] if not USE_RGR: # Then we can use the original models print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") elif USE_RGR: # Entered condition of gyre, we need to copy the files print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) good_remote_path = os.path.join(current_dir2, os.path.basename(dirente2)) cm.copy_local_file(localfile=pdbf, remotefile=os.path.join(good_remote_path, str(val[1]) + "_" + str( counter) + ".pdb"), remote_path_asitis=True, send_now=True) if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") if not computeLLGonly: f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") else: f.write("MACMR PROTOCOL OFF" + "\n") if not Aniso: f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("MACANO PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") f.write("XYZOUT ON" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("TOPFILES " + str(nrts + 30) + "\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE " + rota["name"] + " DISABLE CHECK ON" + "\n") if frag_fixed > 0: for frifr in rota["fixed_frags"]: if frifr["name"] != rota["name"]: f.write("ENSEMBLE " + frifr["name"] + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str( frifr["rmsd"] if "rmsd" in frifr else RMSD) + "\n") f.write("ENSEMBLE " + frifr["name"] + " DISABLE CHECK ON" + "\n") if frag_fixed > 0: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prio[0]) + " ZSCORE: " + str( prio[1]) + "\n") else: f.write("SOLU 6DIM ENSE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "FRAC " + str( (rota["frac"])[0]) + " " + str((rota["frac"])[1]) + " " + str( (rota["frac"])[2]) + "\t" + "BFAC " + str(rota["bfactor"]) + " #CLUSTER: NONE LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + "\n") cou += 1 f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") conv2[str(counter) + ".1.pdb"] = (pdbf, rota["name"]) counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def startRNPOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, spaceGroup, frag_fixed, tops=None, LIMIT_CLUSTER=None, usePDO=False, sampl=-1, USE_TNCS=True, USE_RGR=False, BFAC=False, VRMS=False, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False, consider_inverted_helix=False, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param nameJob: :type nameJob: :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param spaceGroup: :type spaceGroup: :param frag_fixed: :type frag_fixed: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param usePDO: :type usePDO: :param sampl: :type sampl: :param USE_TNCS: :type USE_TNCS: :param USE_RGR: :type USE_RGR: :param BFAC: :type BFAC: :param VRMS: :type VRMS: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param RNP_GYRE: :type RNP_GYRE: :param consider_inverted_helix: :type consider_inverted_helix: :return: :rtype: """ # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" RNP_GYRE = "\n MACMR CHAINS ON" if RNP_GYRE else "" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 conv2 = {} ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall for cds in range(len(ClusAll)): #extract solutions to refine from CluAll clu = ClusAll[cds] nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue sol = clu["heapSolutions"].asList()[0] #first solution (tuple) from this cluster rota = sol[1] #solution dictionary from this tuple prio = (rota["llg"], rota["zscore"]) cou = 0 # while sol != None: # if tops != None and cou >= tops: # break list_pdbs = {} #dictionary that will contain ensname=>path to pdb ct = 0 if usePDO or consider_inverted_helix: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if (franumero == frag_fixed - 2) or consider_inverted_helix: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False, rota["rmsd"] if "rmsd" in rota else RMSD) ct += 1 if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True, RMSD) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False, rotafi["rmsd"] if "rmsd" in rotafi else RMSD) ct += 1 numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB # print list_pdbs """" nameRota = "" if rota["name"].split("xx") > 1: #if rota["name"] contains "xx" it means is an ARCIMBOLDO nameRota = "ensarci0" else: nameRota = rota["name"] """ # print("HAS ROTA RMSD:",rota["rmsd"]) # for fixed in rota["fixed_frags"]: # print("FIXED:",fixed["rmsd"]) # # print("LIST_PDBS") # for k,vl in list_pdbs.items(): # print(k,vl) # quit() if True: # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "rnp.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "rnp.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_RNP" + "\n") f.write('HKLIN "rnp.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM testing for microED f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35"+"\n") # NOTE CM testing for microED f.write("TITLE Test refinement and phasing" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + RNP_GYRE + "\n") if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT ON" + "\n") f.write("TOPFILES "+str(nrts+30) +"\n") if not USE_TNCS: f.write("TNCS USE OFF\n") art = list_pdbs[ensembles[rota["name"]]][1] if not usePDO and not consider_inverted_helix: f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " DISABLE CHECK ON" + "\n") if frag_fixed > 1: for frifr in rota["fixed_frags"]: if list_pdbs[ensembles[frifr["name"]]][1] != art: f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " PDBFILE " + str( list_pdbs[ensembles[frifr["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(frifr["rmsd"] if "rmsd" in frifr else RMSD) + "\n") f.write("ENSEMBLE ensarci" + str( list_pdbs[ensembles[frifr["name"]]][1]) + " DISABLE CHECK ON" + "\n") else: for valok in list_pdbs: # this is a dictionary in reality for this reason it works valo = list_pdbs[valok] f.write("ENSEMBLE ensarci" + str(valo[1]) + " PDBFILE " + str(valo[1]) + "_" + str( counter) + ".pdb RMS " + str(valo[3]) + "\n") f.write("ENSEMBLE ensarci" + str(valo[1]) + " DISABLE CHECK ON" + "\n") s = 0 for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) lip = [] ros = [] # lip.append(rota["original_rotcluster"].split("_")[-1]) if "fixed_frags" not in rota: rota["fixed_frags"] = [] for r in rota["fixed_frags"]: lip.append(r["original_rotcluster"].split("_")[-1]) ros.append(r) rota["fixed_frags"] = [] ros.append(rota) lip = sorted(lip) # print "!!!!!!!!!!",lip fixd = [] for ep in range(len(lip)): ced = lip[ep] initial = None # print "cambio de guardia",ced while True: rep = ros.pop(0) if initial != None and initial == rep: ros.append(rep) break if initial == None: initial = rep # print rep["original_rotcluster"].split("_")[-1], ced if rep["original_rotcluster"].split("_")[-1] == ced: if len(fixd) == 0: rep["original_rotcluster"] = ced rep["n_prev_cluster"] = int(ced) else: rep["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + ced rep["n_prev_cluster"] = __getIDClusterFromDescription(rep["original_rotcluster"]) # if ep == len(lip)-1: # rep["fixed_frags"] = fixd # else: rep["fixed_frags"] = [] fixd.append(copy.deepcopy(rep)) # print "He entrado aqui" break else: # print "No es la que quiero paso a la proxima" ros.append(rep) rota = ros.pop() if len(fixd) > 0: rota["fixed_frags"] = fixd rota["original_rotcluster"] = fixd[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] rota["n_prev_cluster"] = __getIDClusterFromDescription(rota["original_rotcluster"]) else: rota["fixed_frags"] = [] # print rota["original_rotcluster"] if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE ensarci" + str( list_pdbs[ensembles[rotafi["name"]]][1]) + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " #CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + " " + rotafi["name"] + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") if "n_prev_cluster" in rota: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " #CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prio[0]) + " ZSCORE: " + str(prio[1]) + " " + rota["name"] + "\n") else: f.write( "SOLU 6DIM ENSE ensarci" + str(list_pdbs[ensembles[rota["name"]]][1]) + " EULER \t" + str( (rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "FRAC " + str((rota["frac"])[0]) + " " + str( (rota["frac"])[1]) + " " + str((rota["frac"])[2]) + "\t" + "BFAC " + str( rota["bfactor"]) + " " + rota["name"] + "\n") # cou += 1 s += 1 # nuovoPath = pdbf[:-4]+rota["name"].split("ensembleID")[1]+".pdb" # conv2[str(counter)+"."+str(s)+".pdb"] = (nuovoPath,rota["name"]) conv2[str(counter) + "." + str(s) + ".pdb"] = (ensembles[rota["name"]], rota["name"]) f.write('ROOT "' + str(counter) + '"\n') if sampl != -1: f.write("SAMPLING ROT " + str(sampl) + "\n") f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) def startRNPJob(outputDirectory, op): """ :param outputDirectory: :type outputDirectory: :param op: :type op: :return: :rtype: """ if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "RNP of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startRNPJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("rnp.mtz", False) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq, conv2 def __organizeClustersByFixedFragAndIDComb(ClusAll): """ :param ClusAll: :type ClusAll: :return: :rtype: """ ClutAll = [] for clu in ClusAll: dicsupp = {} for item in clu["heapSolutions"]: prio, rota = item fixed = [] if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: fixed.append(rotafi["euler"]) jso = json.dumps(fixed) if jso in dicsupp.keys(): dicsupp[jso].append(rota) else: dicsupp[jso] = [rota] for key in dicsupp.keys(): dirf = {"heapSolutions": ADT.Heap()} for rota in dicsupp[key]: dirf["heapSolutions"].push((-1 * rota["llg"], -1 * rota["zscore"]), rota) ClutAll.append(dirf) return ClutAll def startFTF(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, nice, normfactors, tncsfactors, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, USE_TNCS=True, LIMIT_CLUSTER=None, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param nice: :type nice: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param final_tra: :type final_tra: :param save_tra: :type save_tra: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param cutoff_pack: :type cutoff_pack: :param sampl: :type sampl: :param tops: :type tops: :param USE_TNCS: :type USE_TNCS: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param PACK_TRA: :type PACK_TRA: :return: :rtype: """ if not (os.path.exists(outputDire)): os.makedirs(outputDire) # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" for inde in range(len(ClusAll)): clu = ClusAll[inde] nrts = len(clu["heapSolutions"].asList()) if nrts == 0 or LIMIT_CLUSTER != None and LIMIT_CLUSTER != inde: if current_dir2 != "" and inde == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue s = 0 cou = 0 for sol in clu["heapSolutions"]: if frag_fixed == 1 and tops != None and cou >= tops: break rota = sol[1] prio = (rota["llg"], rota["zscore"]) pdbf = ensembles[rota["name"]] # if usePDB != None: # pdbf = usePDB if pdbf.endswith(".pdb"): # print "Preparing: "+str(pdbf)+" as model "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 try: os.symlink(pdbf, os.path.join(outputDirectory, str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(counter) + ".pdb") if True: f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write("#!/bin/tcsh" + "\n") f.write("MODE MR_FTF" + "\n") f.write('HKLIN "tran.mtz"' + "\n") f.write('HKLOUT OFF' + "\n") if not Intensities: f.write("LABIN F=" + F + " SIGF=" + SIGF + "\n") else: f.write("LABIN I=" + F + " SIGI=" + SIGF + "\n") # NOTE CM: testing micro ED data f.write(formfactors + "\n") f.write("RFACTOR USE OFF CUTOFF 35" + "\n") # NOTE CM: testing micro ED data f.write("TITLE Test fast translation for models" + "\n") f.write("JOBS 1" + "\n") f.write("SGALTERNATIVE SELECT NONE" + "\n") f.write("COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n") f.write("MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n") if PACK_TRA: f.write("TRANS PACK USE ON" + "\n") f.write("TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n") # we use the pack cutoff else: f.write("TRANSLATE PACKING CUTOFF 100" + "\n") # We allow all clashes to pass if not Aniso: f.write("MACANO PROTOCOL OFF" + "\n") f.write("MACTNCS PROTOCOL OFF" + "\n") f.write("TNCS EPSFAC READ anis.tncs" + "\n") f.write("NORM EPSFAC READ anis.norm" + "\n") f.write("RESOLUTION " + str(lowR) + " " + str(highR) + "\n") if BULK_BSOL >= 0 and BULK_FSOL >= 0: f.write("SOLPARAMETERS BULK USE ON" + "\n") f.write("SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n") else: f.write("SOLPARAMETERS BULK USE OFF" + "\n") f.write("XYZOUT OFF" + "\n") if not USE_TNCS: f.write("TNCS USE OFF\n") else: f.write("TNCS USE ON\n") f.write("ENSEMBLE " + rota["name"] + " PDBFILE " + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n") f.write("ENSEMBLE " + rota["name"] + " DISABLE CHECK ON" + "\n") f.write("SEARCH METHOD FAST" + "\n") f.write("PEAKS ROT CLUSTER ON" + "\n") if frag_fixed > 1: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) f.write("SOLU 6DIM ENSE " + rotafi["name"] + " EULER \t" + str( (rotafi["euler"])[0]) + " " + str((rotafi["euler"])[1]) + " " + str( (rotafi["euler"])[2]) + "\t" + "FRAC " + str((rotafi["frac"])[0]) + " " + str( (rotafi["frac"])[1]) + " " + str((rotafi["frac"])[2]) + "\t" + "BFAC " + str( rotafi["bfactor"]) + " # " + rotafi["name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str( prifi[0]) + " ZSCORE: " + str(prifi[1]) + "\n") else: f.write("SOLU SET" + "\n") f.write("SOLU SPAC " + str(spaceGroup) + "\n") # NOTE CM: adding the RF (llg from rotation) to the SOLU TRIAL if "n_prev_cluster" in rota: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RF "+ str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota["name"] + " CLUSTER: " + str( rota["original_rotcluster"].split("_")[-1]) + "\n") else: f.write( "SOLU TRIAL ENSEMBLE " + rota["name"] + " EULER \t" + str((rota["euler"])[0]) + " " + str( (rota["euler"])[1]) + " " + str((rota["euler"])[2]) + "\t" + "RF " + str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rotafi["name"] + "\n") cou += 1 if sampl != -1: f.write("SAMPLING TRA " + str(sampl) + "\n") f.write("TRANSLATE VOLUME FULL" + "\n") f.write("PEAKS TRA SELECT PERCENT" + "\n") f.write("PEAKS TRA CUTOFF " + str(save_tra) + "\n") f.write('ROOT "' + str(counter) + '"\n') f.write("END\n") f.write("EOF-phaser") f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (inde == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: NO SOLUTIONS AVAILABLE TO PERFORM THE TRANSLATION SEARCH. MAYBE ALL LLG ARE BELOW THE CONFIGURED THRESHOLD. (DEFUALT IS POSITIVE VALUES)...ENDING NOW..." # sys.exit(0) return counter def startFTFJob(outputDirectory, op): if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def startFTFOnePerCluster(DicParameters, cm, sym, nameJob, ClusAll, ensembles, outputDire, mtz, MW, NC, F, SIGF, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, lowR, highR, final_tra, save_tra, frag_fixed, spaceGroup, cutoff_pack, sampl=-1, tops=None, LIMIT_CLUSTER=None, USE_TNCS=True, usePDO=False, VRMS=False, BFAC=False, BULK_FSOL=-1, BULK_BSOL=-1, PACK_TRA=False, excludeZscore=0, formfactors='FORMFACTORS XRAY'): """ :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param outputDire: :type outputDire: :param mtz: :type mtz: :param MW: :type MW: :param NC: :type NC: :param F: :type F: :param SIGF: :type SIGF: :param Intensities: :type Intensities: :param Aniso: :type Aniso: :param normfactors: :type normfactors: :param tncsfactors: :type tncsfactors: :param nice: :type nice: :param RMSD: :type RMSD: :param lowR: :type lowR: :param highR: :type highR: :param final_tra: :type final_tra: :param save_tra: :type save_tra: :param frag_fixed: :type frag_fixed: :param spaceGroup: :type spaceGroup: :param cutoff_pack: :type cutoff_pack: :param sampl: :type sampl: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param USE_TNCS: :type USE_TNCS: :param usePDO: :type usePDO: :param VRMS: :type VRMS: :param BFAC: :type BFAC: :param BULK_FSOL: :type BULK_FSOL: :param BULK_BSOL: :type BULK_BSOL: :param PACK_TRA: :type PACK_TRA: :param excludeZscore: :type excludeZscore: :return: :rtype: """ # Set phaser keywords VRMS = "ON" if VRMS else "OFF" BFAC = "ON" if BFAC else "OFF" if not (os.path.exists(outputDire)): os.makedirs(outputDire) dirente = outputDire if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente)) print cm.change_remote_dir(os.path.basename(dirente)) counter = 0 ndir = 0 dirente2 = "" current_dir2 = "" numall = -1 max_num_all = numall ClusAll = __organizeClustersByFixedFragAndIDComb(ClusAll) used_ensembles = {} solsFromTheSameCluster={} #NS for cds in range(len(ClusAll)): clu = ClusAll[cds] s = 0 nrts = len(clu["heapSolutions"].asList()) if nrts == 0: if current_dir2 != "" and cds == len(ClusAll) - 1: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) continue list_pdbs = {} ct = 0 if usePDO: for key in ensembles: try: franumero = int(key.split("FR")[-1].split("_")[0]) except: continue if franumero == frag_fixed - 2: if ensembles[key] not in list_pdbs.keys(): list_pdbs[ensembles[key]] = (key, ct, False) ct += 1 sol = clu["heapSolutions"].asList()[0] rota = sol[1] mainRot=rota['name'] solsFromTheSameCluster[rota['name']]=mainRot #NS: then ensemblexx0FR.. from rotations of the same cluster will be added prio = (rota["llg"], rota["zscore"]) if ensembles[rota["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rota["name"]]] = (rota["name"], ct, True) ct += 1 if "fixed_frags" in rota: for rotafi in rota["fixed_frags"]: if ensembles[rotafi["name"]] not in list_pdbs.keys(): list_pdbs[ensembles[rotafi["name"]]] = (rotafi["name"], ct, False) ct += 1 numall = ct if numall > max_num_all: max_num_all = numall # if usePDB != None: # pdbf = usePDB # print "Preparing job for model: "+str(pdbf)+" in cluster "+str(counter) if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDirectory = os.path.join(outputDire, "./" + str(ndir) + "/") if not (os.path.exists(outputDirectory)): os.makedirs(outputDirectory) try: os.symlink(mtz, os.path.join(outputDirectory, "tran.mtz")) os.symlink(tncsfactors, os.path.join(outputDirectory, "anis.tncs")) os.symlink(normfactors, os.path.join(outputDirectory, "anis.norm")) except: pass dirente2 = outputDirectory if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): current_dir2 = cm.get_remote_pwd() print cm.create_remote_dir(os.path.basename(dirente2)) print cm.change_remote_dir(os.path.basename(dirente2)) cm.create_remote_link(cm.remote_mtz_path, "tran.mtz") cm.create_remote_link(cm.remote_tncs_path, "anis.tncs") cm.create_remote_link(cm.remote_norm_path, "anis.norm") ndir += 1 for pdbf in list_pdbs.keys(): try: val = list_pdbs[pdbf] os.symlink(pdbf, os.path.join(outputDirectory, str(val[1]) + "_" + str(counter) + ".pdb")) except: pass if hasattr(cm, "channel"): # print "pdb",cm.get_remote_pwd(),os.path.join(cm.remote_library_path,os.path.basename(pdbf)) for pdbf in list_pdbs.keys(): val = list_pdbs[pdbf] print cm.change_remote_dir(os.path.join(current_dir2, os.path.basename(dirente2))) cm.create_remote_link(os.path.join(cm.remote_library_path, os.path.basename(pdbf)), str(val[1]) + "_" + str(counter) + ".pdb") # f = open(outputDirectory+"/"+str(counter)+".sh", "w") headlines = "" enselines = "" taillines = "" headlines += "#!/bin/tcsh" + "\n" headlines += "MODE MR_FTF"+"\n" headlines += 'HKLIN "tran.mtz"'+"\n" headlines += 'HKLOUT OFF' + "\n" if not Intensities: headlines += 'LABIN F=' + F + ' SIGF=' + SIGF + '\n' else: headlines += 'LABIN I=' + F + ' SIGI=' + SIGF + '\n' #NOTE CM testing for microED headlines += formfactors + "\n" headlines += "RFACTOR USE OFF CUTOFF 35" + "\n" # NOTE CM testing for microED headlines += "TITLE Test fast translation for models" + "\n" headlines += "JOBS 1" + "\n" headlines += "COMPOSITION PROTEIN MW " + str(MW) + " NUMBER " + str(NC) + "\n" headlines += "MACMR ROT ON TRA ON BFAC " + BFAC + " VRMS " + VRMS + "\n" if PACK_TRA: headlines += "TRANS PACK USE ON" + "\n" headlines += "TRANSLATE PACKING CUTOFF " + str(cutoff_pack) + "\n" else: headlines += "TRANSLATE PACKING CUTOFF 100" + "\n" if not Aniso: headlines += "MACTNCS PROTOCOL OFF" + "\n" headlines += "MACANO PROTOCOL OFF" + "\n" headlines += "TNCS EPSFAC READ anis.tncs" + "\n" headlines += "NORM EPSFAC READ anis.norm" + "\n" headlines += "RESOLUTION " + str(lowR) + " " + str(highR) + "\n" headlines += "XYZOUT OFF" + "\n" if not USE_TNCS: headlines += "TNCS USE OFF\n" else: headlines += "TNCS USE ON\n" headlines += "SEARCH METHOD FAST" + "\n" headlines += "PEAKS ROT CLUSTER ON" + "\n" if BULK_BSOL >= 0 and BULK_FSOL >= 0: headlines += "SOLPARAMETERS BULK USE ON" + "\n" headlines += "SOLPARAMETERS BULK FSOL " + str(BULK_FSOL) + " BSOL " + str(BULK_BSOL) + "\n" else: headlines += "SOLPARAMETERS BULK USE OFF" + "\n" if frag_fixed > 1 and "fixed_frags" in rota: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" for frifr in rota["fixed_frags"]: rotafi = frifr prifi = (rotafi["llg"], rotafi["zscore"]) if not str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rotafi["name"] + " PDBFILE " + str( list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rotafi["rmsd"] if "rmsd" in rotafi else RMSD) + "\n" enselines += "ENSEMBLE " + rotafi["name"] + " DISABLE CHECK ON" + "\n" used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rotafi["name"]] else: used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rotafi["name"]) taillines += "SOLU 6DIM ENSE " + \ used_ensembles[str(list_pdbs[ensembles[rotafi["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rotafi["euler"])[0]) + " " + str( (rotafi["euler"])[1]) + " " + str((rotafi["euler"])[2]) + "\t" + "FRAC " + str( (rotafi["frac"])[0]) + " " + str((rotafi["frac"])[1]) + " " + str( (rotafi["frac"])[2]) + "\t" + "BFAC " + str(rotafi["bfactor"]) + " # " + rotafi[ "name"] + " CLUSTER: " + str( rotafi["original_rotcluster"].split("_")[-1]) + " LLG: " + str(prifi[0]) + " ZSCORE: " + str( prifi[1]) + "\n" else: taillines += "SOLU SET" + "\n" taillines += "SOLU SPAC " + str(spaceGroup) + "\n" s = 0 # print "==============================ROTATIONS=============================",counter for sol in clu["heapSolutions"]: if tops != None and s >= tops: break rota = sol[1] solsFromTheSameCluster[rota['name']]=mainRot prio = (rota["llg"], rota["zscore"]) # print "NAME:",rota["name"],"EULER:",rota["euler"],"FRAC",rota["frac"] if "n_prev_cluster" in rota: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n" enselines += "ENSEMBLE " + rota["name"] + " DISABLE CHECK ON" + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RF "+ str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota[ "name"] + " CLUSTER: " + str(rota["original_rotcluster"].split("_")[-1]) + "\n" else: if not str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str( counter) + ".pdb" in used_ensembles.keys(): enselines += "ENSEMBLE " + rota["name"] + " PDBFILE " + str( list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb RMS " + str(rota["rmsd"] if "rmsd" in rota else RMSD) + "\n" enselines += "ENSEMBLE " + rota["name"] + " DISABLE CHECK ON" + "\n" used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"] = [ rota["name"]] else: used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"].append( rota["name"]) taillines += "SOLU TRIAL ENSEMBLE " + \ used_ensembles[str(list_pdbs[ensembles[rota["name"]]][1]) + "_" + str(counter) + ".pdb"][ 0] + " EULER \t" + str((rota["euler"])[0]) + " " + str((rota["euler"])[1]) + " " + str( (rota["euler"])[2]) + "\t" + "RF "+ str(rota["llg"]) + "\t" + "RFZ " + str(rota["zscore"]) + " # " + rota["name"] + "\n" s += 1 # print "========================================================================",counter if sampl != -1: taillines += "SAMPLING TRA " + str(sampl) + "\n" taillines += "TRANSLATE VOLUME FULL" + "\n" if excludeZscore > 0: taillines += "PEAKS TRA SELECT SIGMA CUTOFF %s\n"%excludeZscore else: taillines += "PEAKS TRA SELECT PERCENT" + "\n" taillines += "PEAKS TRA CUTOFF " + str(save_tra) + "\n" taillines += 'ROOT "' + str(counter) + '"\n' taillines += "END\n" taillines += "EOF-phaser" f = open(outputDirectory + "/" + str(counter) + ".sh", "w") f.write(headlines) f.write(enselines) f.write(taillines) f.close() if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDirectory, str(counter) + ".sh"), "./" + str(ndir - 1) + "/" + str(counter) + ".sh") # cm.copy_local_file(os.path.join(outputDirectory,str(counter)+".sh"),str(counter)+".sh") counter += 1 if counter % NUMBER_OF_FILES_PER_DIRECTORY == 0 or (cds == len(ClusAll) - 1): if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) if counter <= 0: print "WARNING: No solutions are available for translation search" print "Maybe all the LLG values are below the configured threshold. Exiting now..." # sys.exit(0) return counter print "Waiting for all threads to finish...\n" while 1: if len(threading.enumerate()) > 1: pass else: print "ok\n" break def startFTFJob(outputDirectory, op): """ :param outputDirectory: path to folder for the output :type outputDirectory: str :param op: :type op: :return: :rtype: """ if os.path.exists(os.path.join(outputDirectory, str(op) + ".sol")): return f = open(os.path.join(outputDirectory, str(op) + ".sh"), "r") f2 = open(os.path.join(outputDirectory, str(op) + ".out"), "w") print "Executing..." print "nice", "-n" + str(nice), PATH_NEW_PHASER, "<", os.path.join(outputDirectory, str(op) + ".sh"), ">", os.path.join( outputDirectory, str(op) + ".out") print " *** TF Job %s / %s***\n" % (op + 1, counter) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_PHASER], stdin=f, stdout=f2, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() f2.close() print err listaDirec = [] numero = 0 for pr in range(ndir): if pr == ndir - 1: numero = counter - ((ndir - 1) * NUMBER_OF_FILES_PER_DIRECTORY) else: numero = NUMBER_OF_FILES_PER_DIRECTORY if hasattr(cm, "channel"): listaDirec.append((os.path.join(cm.get_remote_pwd(), "./" + str(pr)), numero)) else: listaDirec.append((os.path.join(os.path.abspath(outputDire), "./" + str(pr)), numero)) nq = counter print "FTF of " + str(nq) + " models submitted to the local machine with:" print "mtz: " + str(mtz) + " F=" + str(F) + " SIGF=" + str(SIGF) print "MW: " + str(MW) + " NC: " + str(NC) print "RMSD: " + str(RMSD) + " lowR: " + str(lowR) + " highR: " + str(highR) if sampl != -1: print "at a sampling size: " + str(sampl) print "Picking all translations with a peak >= " + str(save_tra) print "Saving all translations with a peak >= " + str(save_tra) nl = 0 if cm is None: for op in range(counter): if op > 0 and op % NUMBER_OF_FILES_PER_DIRECTORY == 0: nl += 1 # current directory number outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") else: outputDirectory = os.path.join(outputDire, "./" + str(nl) + "/") try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startFTFJob, outputDirectory, op) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = PATH_NEW_PHASER + "<" + os.path.join(outputDirectory, str(op) + ".sh") + ">" + os.path.join( outputDirectory, str(op) + ".out") SystemUtility.launchCommand(comando, os.path.join(outputDirectory, str(op) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) job.setExecutable(PATH_NEW_PHASER) job.setInitialDir(listaDirec) lineStdIn = ".sh" lia = lineStdIn.split() job.setStdIn(lia) job.addInputFile("tran.mtz", False) if max_num_all > 0: for lo in range(max_num_all): job.addInputFile(str(lo) + "_" + ".pdb", True) else: job.addInputFile(".pdb", True) job.addInputFile(".sh", True) job.addInputFile("anis.norm", False) job.addInputFile("anis.tncs", False) job.addOutputFile(".out", True) cm.setRequirements(PHASER_REQUIREMENTS) cm.setRank("kflops") (nc, nq) = cm.submitJobs(job, counter) if hasattr(cm, "channel"): cm.change_remote_dir(current_dir) return nq def put_same_unit_cell_and_symm(list_frags, cell_dim, ref_point): """ :param list_frags: :type list_frags: :param cell_dim: :type cell_dim: :param ref_point: :type ref_point: :return: :rtype: """ struss = [] lista_trials = [("+", 0.0), ("*", -1.0), ("+", -1.0), ("+", 1.0)] parameters = {} p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None cx = None cy = None cz = None if len(ref_point) > 0: cx = ref_point[0] cy = ref_point[1] cz = ref_point[2] print "Center of mass of ent:", cx, cy, cz for frag in list_frags: print "Processing new fragment..." frag_orig = [] frag_move = [] no_convert = False # no_trial_zero = False all_first_point = [] p = [] wx = None wy = None wz = None ux = None uy = None uz = None d1 = None d2 = None for atom in frag: if isinstance(atom, list): ux = atom[0] uy = atom[1] uz = atom[2] else: atm = atom.get_coord() ux = atm[0] uy = atm[1] uz = atm[2] nx, ny, nz, parameters = convertFromOrthToFrac(ux, uy, uz, cell_dim, parameters) # print print "Original fractional", nx, ny, nz frag_orig.append(numpy.array([ux, uy, uz])) if no_convert: continue if wx is not None and wy is not None and wz is not None: d1 = numpy.sqrt(((wx - ux) * (wx - ux)) + ((wy - uy) * (wy - uy)) + ((wz - uz) * (wz - uz))) # print "distance1",d1,"--",wx,wy,wz,"-",ux,uy,uz # wx = ux # wy = uy # wz = uz if nx < 0: nx = -1 * nx if ny < 0: ny = -1 * ny if nz < 0: nz = -1 * nz nx = nx - numpy.modf(nx)[1] ny = ny - numpy.modf(ny)[1] nz = nz - numpy.modf(nz)[1] if len(p) > 0: hp = ADT.Heap() for g in p: px = g[0] py = g[1] pz = g[2] # print "Using",px,py,pz try: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: # no_convert = True if ada[0] == "+": sx = nx + ada[1] elif ada[0] == "*": sx = nx * ada[1] if adb[0] == "+": sy = ny + adb[1] elif adb[0] == "*": sy = ny * adb[1] if adc[0] == "+": sz = nz + adc[1] elif adc[0] == "*": sz = nz * adc[1] # print "Application of trial",sx,sy,sz sx, sy, sz, parameters = convertFromFracToOrth(sx, sy, sz, cell_dim, parameters) d2 = numpy.sqrt( ((px - sx) * (px - sx)) + ((py - sy) * (py - sy)) + ((pz - sz) * (pz - sz))) # print "distance2",d2,"--",px,py,pz,"-",sx,sy,sz # print "LOOK AT IT:",d1-d2,ada,adb,adc df = abs(d1 - d2) ang = ( 57.2957795 * angle_between([wx - ux, wy - uy, wz - uz], [px - sx, py - sy, pz - sz], [0.0, 0.0, 1.0], signed=False)) # if abs(d1-d2) <= 0.05 and (57.2957795*angle_between([wx-ux,wy-uy,wz-uz],[px-sx,py-sy,pz-sz],[0.0,0.0,1.0],signed=False)) <= 0.05: if df <= 9.0 and ang <= 9.0: # no_convert = False if len(p) > 1 and cx is not None and cy is not None and cz is not None: d3 = numpy.sqrt(((cx - sx) * (cx - sx)) + ((cy - sy) * (cy - sy)) + ( (cz - sz) * (cz - sz))) hp.push((df, ang, d3), (sx, sy, sz, px, py, pz)) # it is a MinHeap # print "distance found",d3,sy,sy,sz else: hp.push((df, ang, 1), (sx, sy, sz, px, py, pz)) # raise Exception # print "It is compatible:",d1-d2,ada,adb,adc except: pass if hp.len() > 0: no_convert = False item = hp.pop() nx = item[1][0] ny = item[1][1] nz = item[1][2] px = item[1][3] py = item[1][4] pz = item[1][5] print "minim values", item[0], nx, ny, nz # df,ang,dis = item[0] # if df > 1.5 or ang > 1.0: # print "Values are too far from correct, cannot be accepted" # no_convert = True else: no_convert = True if not no_convert: frag_move[-1] = numpy.array([px, py, pz]) # break p = [] else: for ada in lista_trials: for adb in lista_trials: for adc in lista_trials: if ada[0] == "+": qx = nx + ada[1] elif ada[0] == "*": qx = nx * ada[1] if adb[0] == "+": qy = ny + adb[1] elif adb[0] == "*": qy = ny * adb[1] if adc[0] == "+": qz = nz + adc[1] elif adc[0] == "*": qz = nz * adc[1] qx, qy, qz, parameters = convertFromFracToOrth(qx, qy, qz, cell_dim, parameters) p.append([qx, qy, qz]) if no_convert: print "ATTENTION CAN'T FIND A FRAC COORDS FOR ATOM. USING ORIGINAL POSITIONS..." continue if len(p) == 0: p.append([nx, ny, nz]) wx = ux wy = uy wz = uz frag_move.append(numpy.array([nx, ny, nz])) q = 0 for atom in frag: if no_convert: atom.set_coord(frag_orig[q]) else: atom.set_coord(frag_move[q]) q += 1 struss.append(atom) return struss def translateListByFrac(num, num2, struct, frac, cell_dim, return_atoms=False): """ :param num: :type num: :param num2: :type num2: :param struct: :type struct: :param frac: :type frac: :param cell_dim: :type cell_dim: :param return_atoms: :type return_atoms: :return: :rtype: """ structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) if not return_atoms: structure.append(numpy.array([nx, ny, nz])) else: atom.set_coord(numpy.array([nx, ny, nz])) if not return_atoms: return structure else: return struct def rotateListByMatrix(num, num2, struct, matrices, cell_dim, return_atoms=False): """ :param num: :type num: :param num2: :type num2: :param struct: :type struct: :param matrices: :type matrices: :param cell_dim: :type cell_dim: :param return_atoms: :type return_atoms: :return: :rtype: """ structure = [] parameters = {} for atom in struct: if isinstance(atom, list): x = atom[0] y = atom[1] z = atom[2] else: atm = atom.get_coord() x = atm[0] y = atm[1] z = atm[2] for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz if not return_atoms: structure.append(numpy.array([x, y, z])) else: atom.set_coord(numpy.array([x, y, z])) if not return_atoms: return structure else: return struct def rotateStructureByMatrix(num, num2, struct, matrices, outputPath, cell_dim, writePDB=True, filename=None): """ :param num: :type num: :param num2: :type num2: :param struct: :type struct: :param matrices: :type matrices: :param outputPath: :type outputPath: :param cell_dim: :type cell_dim: :param writePDB: :type writePDB: :param filename: :type filename: :return: :rtype: """ structure = copy.deepcopy(struct) if not os.path.exists(outputPath): os.makedirs(outputPath) if writePDB: if filename == None: nameOutput = outputPath + str(num) + "_" + str(num2) + "_rot.pdb" # NSADD else: nameOutput = outputPath + str(filename) + "_rot.pdb" pdb = open(nameOutput, "w") # NSADD parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) for tw in range(len(matrices)): matrice = matrices[tw] if tw > 0: mode = "rotateByCrystCoord" else: mode = "rotateByOrthCoord" if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z nx = (matrice[0][0] * x) + (matrice[0][1] * y) + (matrice[0][2] * z) ny = (matrice[1][0] * x) + (matrice[1][1] * y) + (matrice[1][2] * z) nz = (matrice[2][0] * x) + (matrice[2][1] * y) + (matrice[2][2] * z) if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz x = nx y = ny z = nz atom.set_coord(numpy.array([x, y, z])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() return nameOutput else: return structure def rotateStructureByListCoord(num, num2, structure, listCoord, outputPath, title): """ :param num: :type num: :param num2: :type num2: :param structure: :type structure: :param listCoord: :type listCoord: :param outputPath: :type outputPath: :param title: :type title: :return: :rtype: """ pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") indice = 0 pdb.write("REMARK TITLE " + title + "\n") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): nx = (listCoord[indice]).get_coord()[0] ny = (listCoord[indice]).get_coord()[1] nz = (listCoord[indice]).get_coord()[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) indice += 1 pdb.close() def angleRadBetweenVectors(vec1, vec2): """ :param vec1: :type vec1: :param vec2: :type vec2: :return: :rtype: """ X1 = vec1[0] Y1 = vec1[1] Z1 = vec1[2] X2 = vec2[0] Y2 = vec2[1] Z2 = vec2[2] scaP2 = (X1 * X2) + (Y1 * Y2) + (Z1 * Z2) parallequiv = (numpy.sqrt((X1 * X1) + (Y1 * Y1) + (Z1 * Z1))) * (numpy.sqrt((X2 * X2) + (Y2 * Y2) + (Z2 * Z2))) cosTetaReal = scaP2 / parallequiv # print "scaP2",scaP2 # print "parallequiv",parallequiv # print "cosTetaReal",cosTetaReal # Take care of roundoff errors # cosTetaReal = numpy.min(cosTetaReal,1) # cosTetaReal = numpy.max(-1,cosTetaReal) TetaReal = numpy.arccos(cosTetaReal) return TetaReal def matrixFromEulerAngles2(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ Conversion 323 from James Diebel 2006 """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = (numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = -1 * numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = (-1 * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def matrixFromEulerAngles(th1, th2, th3): # PHASER CONVENTION: Z,Y,Z """ http://en.wikipedia.org/wiki/Euler_angles Relationship to other representations Rotation matrix: ZYZ """ th1 = (th1 * 2 * numpy.pi) / 360 th2 = (th2 * 2 * numpy.pi) / 360 th3 = (th3 * 2 * numpy.pi) / 360 matrice = ADT.get_matrix(3, 3) matrice[0][0] = (numpy.cos(th1) * numpy.cos(th2) * numpy.cos(th3)) - (numpy.sin(th1) * numpy.sin(th3)) matrice[0][1] = ((-1) * numpy.cos(th1) * numpy.cos(th2) * numpy.sin(th3)) - (numpy.sin(th1) * numpy.cos(th3)) matrice[0][2] = numpy.cos(th1) * numpy.sin(th2) matrice[1][0] = (numpy.sin(th1) * numpy.cos(th2) * numpy.cos(th3)) + (numpy.cos(th1) * numpy.sin(th3)) matrice[1][1] = ((-1) * numpy.sin(th1) * numpy.cos(th2) * numpy.sin(th3)) + (numpy.cos(th1) * numpy.cos(th3)) matrice[1][2] = numpy.sin(th1) * numpy.sin(th2) matrice[2][0] = (-1) * numpy.sin(th2) * numpy.cos(th3) matrice[2][1] = numpy.sin(th2) * numpy.sin(th3) matrice[2][2] = numpy.cos(th2) return matrice def clusterAtOnceSols(DicParameters, listaAllsol, isArcimboldo, renamePDBs, rnp_sol, baseDir, name, quate, laue, listNCS, excludeZSCORE, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, convNames, tops=None, LIMIT_CLUSTER=None, giveids=False, excludeZscoreRNP=0.0, make_positive_llg=False, is_verification=False): """ :param DicParameters: :type DicParameters: :param listaAllsol: :type listaAllsol: :param isArcimboldo: :type isArcimboldo: :param renamePDBs: :type renamePDBs: :param rnp_sol: :type rnp_sol: :param baseDir: :type baseDir: :param name: :type name: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param excludeZSCORE: :type excludeZSCORE: :param mode: :type mode: :param ClusteringMode: :type ClusteringMode: :param ensembles: :type ensembles: :param cell_dim: :type cell_dim: :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param convNames: :type convNames: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param giveids: :type giveids: :param excludeZscoreRNP: :type excludeZscoreRNP: :param make_positive_llg: :type make_positive_llg: :return: :rtype: """ riprova = True while riprova: try: traslazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops, make_positive_llg=make_positive_llg) #print "LLG",traslazioni[0]["llg"],"zscore",traslazioni[0]["zscore"] riprova = False except: # print "Error...Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True # time.sleep(30) num = 0 nclu = None listaFileDel = [] listaKeyDel = [] listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} dictioNameClus = {} clus = -1 brat = {} nfixfrags = 0 fullcluname = "" for rotaz in traslazioni: num += 1 rotaz["numInSol"] = num testora = False # if rotaz["name"] == "ensembleIDxx0FR3_1-13": # print "FOUND:::::::::::::::::",rotaz["name"] # testora = True if len(convertnames.keys()) > 0: ensembles[rotaz["name"]] = ensembles[convertnames[rotaz["name"]]] # if testora: # print "AAAAAAAAAAAAAAAAAAAAAAA",rotaz["name"], ensembles[convertnames[rotaz["name"]]] if giveids: nameFi = ensembles[rotaz["name"]] # if testora: # print "BBBBBBBBBBBBBBBBBBBB",nameFi ###if rotaz["name"] not in listaKeyDel: ### listaKeyDel.append(rotaz["name"]) nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "-" + str(rotaz["numInSol"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) # print "nome:",rotaz["name"]+"-"+str(rotaz["numInSol"]),"file:",nameFi if len(rotaz["name"].split("-")) > 1: ensembles[rotaz["name"].split("-")[0] + "-" + str(rotaz["numInSol"])] = nameFi # nuovoPath rotaz["name"] = rotaz["name"].split("-")[0] + "-" + str(rotaz["numInSol"]) # if testora: # print "CCCCCCCCCCCCCCCCCCCCCCC11111111111", rotaz["name"], nameFi else: ensembles[rotaz["name"] + "-" + str(rotaz["numInSol"])] = nameFi # nuovoPath rotaz["name"] = rotaz["name"] + "-" + str(rotaz["numInSol"]) # if testora: # print "CCCCCCCCCCCCCCCCCCCCCCC22222222222", rotaz["name"], nameFi # if nameFi not in listaFileDel: # listaFileDel.append(nameFi) # print "--!!!!--",rotaz["name"] if "fixed_frags" in rotaz and (len(convertnames.keys()) > 0 or not isArcimboldo): for fi in rotaz["fixed_frags"]: #print "--------",fi["name"],fi["name"].split("-") if len(fi["name"].split("-")) > 1: wha = rotaz["name"].split("-")[1] fi["name"] = fi["name"] + "-" + wha ensembles[fi["name"]] = ensembles[rotaz["name"]] # if testora: # print "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD",fi["name"],ensembles[rotaz["name"]] nfixfrags = len(rotaz["fixed_frags"]) + 1 if (len(rotaz["fixed_frags"]) == 0 or LIMIT_CLUSTER != None) and len(dizioClu.values()) > 0: nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = str(nclu) rotaz["n_prev_cluster"] = nclu elif len(rotaz["fixed_frags"]) > 0 and len(dizioClu.values()) > 0: # print "--------dizioClu--------" # print dizioClu # print rotaz["name"] # print "------------------------" nclu = dizioClu.values()[0] rotaz["original_rotcluster"] = rotaz["fixed_frags"][-1]["original_rotcluster"] + "_" + str(nclu) rotaz["n_prev_cluster"] = __getIDClusterFromDescription(rotaz["original_rotcluster"]) clus = rotaz["n_prev_cluster"] if rotaz["zscore"] < excludeZSCORE: # Excluding Zscores below threshold (default threshold=0.0) continue #if mode == "RNP" and (rotaz["tfz=="] < excludeZscoreRNP): #NS ADD Also skip if the TFZ== is too low # print "xx> NOT SELECTING SOL with TFZ==%s, LLG=%s (cutoff TZF==%s)"%(rotaz["tfz=="],rotaz["llg"],excludeZscoreRNP)#NS PRINT # continue #elif mode == "RNP" and (rotaz["tfz=="] >= excludeZscoreRNP): # print "==> SELECTING SOL with TFZ==%s, LLG=%s"%(rotaz["tfz=="],rotaz["llg"])#NS PRINT rotaz["elong"] = 0 # rotaz["fixed_frags"] = fixed listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInSol"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInSol"])] = rotaz dictioNameClus[rotaz["name"]] = rotaz["original_rotcluster"] fullcluname = rotaz["original_rotcluster"] # print "==============ADDING IN================================" # print "rotaz", num # print rotaz # print "==============================================" if mode == "RNP" or mode == "RNP_GIMBLE": for ler in rnp_sol.keys(): cud = int((ler.split("."))[1]) # 8.1.pdb here it takes 1 tud = int((ler.split("."))[0]) # 8.1.pdb here it takes 8 if tops != None and cud > tops: try: os.remove(baseDir + ler) except Exception: pass continue for bla in listaRotNumInRlist: # print "====",bla[1],"======",tud,cud,"....",int(name) if bla[1] == cud and tud == int(name): rnp_sol[ler] = bla[0] brat[ler] = bla[0] break if (mode == "RNP" or mode == "RNP_GIMBLE") and renamePDBs: if isArcimboldo: for ler in brat.keys(): cud = int((ler.split("."))[0]) # print ler, brat[ler], convNames[ler][0], cud, int(name) if cud == int(name): count_try = 0 allf = '' prev = '' while len(allf) == 0 or allf != prev: prev = copy.deepcopy(allf) if count_try < 10: f = open(baseDir + ler, "r") allf = f.read() f.close() if len(allf) == 0 or allf != prev: time.sleep(2) count_try = count_try + 1 else: print 'There is a problem with this pdb file ', baseDir + ler # print dictioNameClus # NOTE: Important change here, I am trying to rename the Files with the same name as theyr rota["name"] #########nede = baseDir + os.path.basename(convNames[ler][0])[:-4] + brat[ler].split("ensembleID")[1] nede = baseDir + brat[ler] # print ".....--..------.....",nede,laue,os.path.exists(os.path.join(baseDir,"../../3_FTF_LIBRARY/")),os.path.join(baseDir,"../3_FTF_LIBRARY/") if str(laue) == "1" and not os.path.exists(os.path.join(baseDir, "../../3_FTF_LIBRARY/")) and not is_verification: nede += "-1.pdb" else: nede += ".pdb" f = open(nede, "w") druppo = str(dictioNameClus[brat[ler]]) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo + "\n")) f.write(allf) f.close() os.remove(baseDir + ler) # shutil.move(baseDir+ler,baseDir+os.path.basename(convNames[ler][0])[:-4]+brat[ler].split("ensembleID")[1]+".pdb") # Associare a .n.pdb l'ensembleID del primo nel .n.pdb ensembles[brat[ler]] = nede else: count_try = 0 allf = '' prev = '' while len(allf) == 0 or allf != prev: prev = copy.deepcopy(allf) if count_try < 10: add = "" if os.path.exists(baseDir + name + ".1.1.pdb"): add="1." f = open(baseDir + name + ".1."+add+"pdb", "r") allf = f.read() f.close() if len(allf) == 0 or allf != prev: time.sleep(2) count_try = count_try + 1 else: print 'There is a problem with this pdb file ', baseDir + name + ".1."+add+"pdb" break # print "+++++++++++",convNames[name+".1.pdb"] nomed = baseDir + os.path.basename(convNames[name + ".1.pdb"][0]) if len(convNames[name + ".1.pdb"][1].split("-")) > 1: nomed = nomed[:-4] + "-" + convNames[name + ".1.pdb"][1].split("-")[1] + ".pdb" f = open(nomed, "w") druppo = str(fullcluname) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) f.write("REMARK CLUSTER " + str(druppo) + "\n") f.write(allf) f.close() os.remove(baseDir + name + ".1.pdb") if os.path.exists(baseDir + name + ".1.1.pdb"): os.remove(baseDir + name + ".1.1.pdb") # shutil.move(baseDir+name+".1.pdb",baseDir+os.path.basename(convNames[name+".1.pdb"][0])) ensembles[convNames[name + ".1.pdb"][1]] = nomed listaAllsol[0] += listaQuaternions listaAllsol[1] += listaRotNumInRlist listaAllsol[2].update(listaDictioNameNumInRlist) if giveids: # if "ensembleIDxx0FR3_1-13" in listaKeyDel: # print "AAAAATTTTTTENTION: I am deleting ","ensembleIDxx0FR3_1-13" for ele in listaKeyDel: del ensembles[ele] return ensembles, listaAllsol, nfixfrags def clusterAllRotInList(DicParameters, listrot, isArcimboldo, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False): """ :param DicParameters: :type DicParameters: :param listrot: :type listrot: :param isArcimboldo: :type isArcimboldo: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param excludeLLG: :type excludeLLG: :param mode: :type mode: :param ClusteringMode: :type ClusteringMode: :param cell_dim: :type cell_dim: :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param giveids: :type giveids: :param applyNameFilter: :type applyNameFilter: :return: :rtype: """ global LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID = 0 ClusAll = [] RotClu = [] ensembles = {} listaAllrot = [[], [], {}, [[], RotClu]] num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "////////",len(listrot) for ro in listrot: rotaz, pdbname = ro num += 1 # print "Checking the "+str(num)+"\\"+str(len(listrot))+" rotation..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue fixed = [] if "fixed_frags" in rotaz.keys(): fixed = rotaz["fixed_frags"] name = 0 if giveids: nameFi = pdbname nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz if not isArcimboldo or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif not isArcimboldo or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, prefilter=1.5) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: \ # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro \ # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return listaAllrot[3][0], listaAllrot[3][1], ensembles else: return listaAllrot[3][0], listaAllrot[3][1] def clusterAtOnce(DicParameters, listaAllrot, isArcimboldo, baseDir, name, quate, laue, listNCS, excludeLLG, mode, ClusteringMode, ensembles, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, giveids=False, applyNameFilter=False, lastFile=False,make_positive_llg=False): """ Description of the function. :param DicParameters: :type DicParameters: :param listaAllrot: :type listaAllrot: :param isArcimboldo: Indicates whether the function has been called with Arcimboldo (True) or Borges (False) :type isArcimboldo: bool :param baseDir: :type baseDir: :param name: :type name: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param excludeLLG: :type excludeLLG: :param mode: Can be "FRF","TRA", "PACK", "RNP", "PICASSO" :type mode: str :param ClusteringMode: :type ClusteringMode: :param ensembles: :type ensembles: :param cell_dim: :type cell_dim: :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param giveids: :type giveids: :param applyNameFilter: :type applyNameFilter: :param lastFile: :type lastFile: :param make_positive_llg: :type make_positive_llg: :return: :rtype: """ global LAST_AVAILABLE_ROTID riprova = True ClusAll = listaAllrot[3][0] RotClu = listaAllrot[3][1] while riprova: try: if mode == "FRF": rotazioni, fixed = readRotationsFRF(baseDir, name, quate, tops=tops,make_positive_llg=make_positive_llg) elif mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotazioni, fixed, dizioClu, convertnames = readTranslationsFTF(baseDir, name, quate, mode, tops=tops,make_positive_llg=make_positive_llg) riprova = False except: print "Error... Trying again reading output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True num = 0 listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} # print "SHERLOCK len(rotazioni)",len(rotazioni) for rotaz in rotazioni: num += 1 # print " SHERLOCK Saving the "+str(num)+"\\"+str(len(rotazioni))+" rotation..." if mode in ["TRA", "PACK", "RNP", "PICASSO"]: rotaz["numInSol"] = num else: rotaz["numInRlist"] = num if rotaz["llg"] < excludeLLG: continue if giveids: nameFi = ensembles[rotaz["name"]] # del ensembles[rotaz["name"]] nuovoPath = os.path.join(os.path.dirname(nameFi), os.path.basename(nameFi)[:-4] + "xx" + str(name) + "FR" + str( len(fixed)) + "_" + str(rotaz["numInRlist"]) + ".pdb") # os.link(nameFi,os.path.join(os.path.dirname(nameFi),nuovoPath)) ensembles["ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str( rotaz["numInRlist"])] = nameFi # nuovoPath rotaz["name"] = "ensembleID" + "xx" + str(name) + "FR" + str(len(fixed)) + "_" + str(rotaz["numInRlist"]) rotaz["fixed_frags"] = fixed # listaQuaternions.append([rotaz["quaternion"][0],rotaz["quaternion"][1],rotaz["quaternion"][2],rotaz["quaternion"][3]]) listaQuaternions.append([rotaz["euler"][0] * 1, rotaz["euler"][1] * 2, rotaz["euler"][2] * 3, ADT.cantor_pairing( [rotaz["euler"][0] * 3, rotaz["euler"][1] * 1, rotaz["euler"][2] * 2])]) listaRotNumInRlist.append((rotaz["name"], rotaz["numInRlist"])) listaDictioNameNumInRlist[(rotaz["name"], rotaz["numInRlist"])] = rotaz # print "((((((((((((((((((((((((((" # for cuu in sorted(listaQuaternions): # print cuu,cuu[0]+cuu[1]+cuu[2] # print "))))))))))))))))))))))))))" # print "Arcimboldo?",isArcimboldo,"name",name,"type(name)",type(name) if (not isArcimboldo and name == "0") or (isArcimboldo and len(fixed) == 0): tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) indes = len(ClusAll) for key in tomerge.keys(): dicton = {"heapSolutions": ADT.Heap()} value = tomerge[key] + [key] pdb_done = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue pdb_done.append(rotaz["name"]) if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) dicton["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) indes += 1 if LIMIT_CLUSTER != None: if len(ClusAll) <= LIMIT_CLUSTER: while len(ClusAll) < LIMIT_CLUSTER + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[LIMIT_CLUSTER] = dicton else: ClusAll.append(dicton) LAST_AVAILABLE_ROTID = indes tomerge = None merged = None nuovo_clus = None subclu = None listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} RotClu = [] for inderec in range(len(ClusAll)): if len(ClusAll[inderec]["heapSolutions"].asList()) == 0: continue prio, rota = ClusAll[inderec]["heapSolutions"].pop() ClusAll[inderec]["heapSolutions"].push(prio, rota) hp = ADT.Heap() hp.push(prio, copy.deepcopy(rota)) RotClu.append({"heapSolutions": hp}) elif (not isArcimboldo and name != "0") or (len( fixed) > 0 and LIMIT_CLUSTER == None): # It means that we are not at the first fragment search for Arcimboldo or we are working with libraries in Borges tomerge, nuovo_clus = __clusterSetOfRotations(isArcimboldo, listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) # print "tomerge" # print tomerge final_clus = [] for key in tomerge.keys(): value = tomerge[key] + [key] pdb_done = [] lisp = [] for va in value: for prio, rotaz in sorted(nuovo_clus[va]["heapSolutions"].asList(), __cmp_rota2, reverse=True): if rotaz["name"] in pdb_done: continue if LIMIT_CLUSTER != None: rotaz["original_rotcluster"] = LIMIT_CLUSTER rotaz["n_prev_cluster"] = LIMIT_CLUSTER # print "WAAA Insert Rotation:",rotaz["name"],"llg:",rotaz["llg"],"zscore:",rotaz["zscore"],"in cluster:",rotaz["n_prev_cluster"] pdb_done.append(rotaz["name"]) lisp.append(rotaz) final_clus.append(lisp) listaQuaternions = [] listaRotNumInRlist = [] listaDictioNameNumInRlist = {} for cl in final_clus: if len(cl) == 0: continue rotaz = cl[0] inserted = False fixedNumbers = [] for rotafi in fixed: fixedNumbers.append(int(rotafi["original_rotcluster"].split("_")[-1])) result, elong = compareRotation(rotaz, rotafi, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) # print "rotaz cluster",rotaz["euler"] # print "rotafi cluster",rotafi["euler"] # print "---",result,elong if result: rotaz["elong"] = elong inserted = True for ro in cl: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rotafi["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) break if not inserted and LIMIT_CLUSTER == None: for inderec in range(len(RotClu)): prio, rota = RotClu[inderec]["heapSolutions"].pop() ncl = rota["n_prev_cluster"] RotClu[inderec]["heapSolutions"].push(prio, rota) if ncl in fixedNumbers: continue # print "---------++++++++++++-------------" # print "//rotaz cluster//",rotaz["euler"],rotaz["name"] # print "//rota cluster//",rota["euler"],rota["name"] result, elong = compareRotation(rotaz, rota, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, print_angles=False) if result: rotaz["elong"] = elong inserted = True for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + \ rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = rota["original_rotcluster"].split("_")[-1] ro["n_prev_cluster"] = int(ro["original_rotcluster"]) break # db.close() if not inserted and LIMIT_CLUSTER == None: rotaz["elong"] = 0 for ro in cl: if len(fixed) > 0: ro["original_rotcluster"] = fixed[-1]["original_rotcluster"] + "_" + str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = __getIDClusterFromDescription(ro["original_rotcluster"]) else: ro["original_rotcluster"] = str(LAST_AVAILABLE_ROTID) ro["n_prev_cluster"] = int(ro["original_rotcluster"]) print "Inserting rotation in a new cluster", LAST_AVAILABLE_ROTID LAST_AVAILABLE_ROTID += 1 hp = ADT.Heap() hp.push((-1 * cl[0]["llg"], -1 * cl[0]["zscore"]), copy.deepcopy(cl[0])) dicn = {"heapSolutions": hp} RotClu.append(dicn) for ro in cl: # print "ro[name]",ro["name"] # if ro["n_prev_cluster"] > 0: # listaQuaternions.append([ro["quaternion"][0],ro["quaternion"][1],ro["quaternion"][2],ro["quaternion"][3]]) listaQuaternions.append([ro["euler"][0] * 1, ro["euler"][1] * 2, ro["euler"][2] * 3, ADT.cantor_pairing( [ro["euler"][0] * 3, ro["euler"][1] * 1, ro["euler"][2] * 2])]) listaRotNumInRlist.append((ro["name"], ro["numInRlist"])) listaDictioNameNumInRlist[(ro["name"], ro["numInRlist"])] = ro # print "CONFIGURO ORIGINAL ROTCLUSTER A: ",ro["original_rotcluster"],ro["n_prev_cluster"] # print "..........................",listaDictioNameNumInRlist ClusAll = saveRotations(DicParameters, [listaQuaternions, listaRotNumInRlist, listaDictioNameNumInRlist, ClusAll], LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) listaAllrot[0] += listaQuaternions listaAllrot[1] += listaRotNumInRlist listaAllrot[2].update(listaDictioNameNumInRlist) listaAllrot[3] = [ClusAll, RotClu] # print "-----------------",len(listaAllrot[0]),len(listaAllrot[1]),len(listaAllrot[2]) if giveids: return ensembles, listaAllrot else: return listaAllrot def __getIDClusterFromDescription(nameClustDesc): """ :param nameClustDesc: :type nameClustDesc: :return: :rtype: """ global MAP_OF_ROT_COMB #NOTE: important change take care it works!!!! clut = tuple(sorted(map(lambda x: int(x), nameClustDesc.split("_")))) if len(clut) == 1: return clut[0] if clut in MAP_OF_ROT_COMB.keys(): return MAP_OF_ROT_COMB[clut] else: # NOTE: if we create the permutations than we will not distinguish 0,1 from 1,0 it means that # that we can choose just one combinations to follow up (0,1) or (1,0) but not both # if we make this distinction here we will have two different type of combinations one for (0,1) and another for (1,0) qp = MAP_OF_ROT_COMB.values() t = 1 if len(qp) > 0: t = max(qp) + 1 MAP_OF_ROT_COMB[clut] = t return t def __mergeEquivalentRotCombination(): """ :return: :rtype: """ global MAP_OF_ROT_COMB equals = {} value_done = [] for key in MAP_OF_ROT_COMB.keys(): value = MAP_OF_ROT_COMB[key] if value not in value_done: value_done.append(value) equals[value] = [] e = itertools.permutations(key) for q in e: if q in MAP_OF_ROT_COMB.keys() and MAP_OF_ROT_COMB[q] != value: equals[value].append(MAP_OF_ROT_COMB[q]) value_done.append(MAP_OF_ROT_COMB[q]) return equals def saveRotations(DicParameters, listaAllrot, LIMIT_CLUSTER=None, applyNameFilter=False): """ :param DicParameters: :type DicParameters: :param listaAllrot: :type listaAllrot: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param applyNameFilter: :type applyNameFilter: :return: :rtype: """ ClusAll = listaAllrot[3] num = 0 rotazioni = listaAllrot[2].values() pdb_done = [] indes = len(ClusAll) for rotaz in sorted(rotazioni, __cmp_rota, reverse=True): # print "/////" # print rotaz # print "////" if applyNameFilter and (rotaz["name"], rotaz["n_prev_cluster"]) in pdb_done: # print "Not saved",(rotaz["name"], rotaz["n_prev_cluster"]) continue pdb_done.append((rotaz["name"], rotaz["n_prev_cluster"])) if "n_prev_cluster" not in rotaz.keys(): if LIMIT_CLUSTER == None: rotaz["n_prev_cluster"] = indes else: rotaz["n_prev_cluster"] = LIMIT_CLUSTER rotaz["original_rotcluster"] = str(rotaz["n_prev_cluster"]) indes += 1 if len(ClusAll) <= rotaz["n_prev_cluster"]: while len(ClusAll) < rotaz["n_prev_cluster"] + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[rotaz["n_prev_cluster"]]["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) return ClusAll def angle_between(A, B, N, signed=True): """ :param A: :type A: :param B: :type B: :param N: :type N: :param signed: :type signed: :return: :rtype: """ # ANGLE BETWEEN TWO 3D VECTORS: # 1- dot(norm(A),norm(B)) (ANGLES UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 2- arcos(dot(A,B)/(|A|*|B|)) (ANGLE UNSIGNED, PROBLEMS FOR SMALL ANGLES WITH ROUNDINGS) # 3- arctan2(|cross(A,B)|,dot(A,B)) (ANGLE UNSIGNED BUT NOT PROBLEMS OF ROUNDINGS # define a vector NORM ex.: N = [0,0,1] # sign = dot(NORM,cross(A,B)) # if sign < 0 then ANGLE measured in 3 should be negative CrossX = A[1] * B[2] - A[2] * B[1] CrossY = A[2] * B[0] - A[0] * B[2] CrossZ = A[0] * B[1] - A[1] * B[0] fCross = numpy.sqrt(CrossX * CrossX + CrossY * CrossY + CrossZ * CrossZ) scaP2 = (A[0] * B[0]) + (A[1] * B[1]) + (A[2] * B[2]) Teta_2 = numpy.arctan2(fCross, scaP2) if signed: sign = (N[0] * CrossX) + (N[1] * CrossY) + (N[2] * CrossZ) if sign < 0: Teta_2 = -Teta_2 return Teta_2 else: return Teta_2 def simpleDistributionOrientationComparison4(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): """ :param rot1: :type rot1: :param rot2: :type rot2: :param threshold: :type threshold: :param quate: :type quate: :param convNames: :type convNames: :param shift: :type shift: :param where: :type where: :param cell_dim: :type cell_dim: :param print_angles: :type print_angles: :return: :rtype: """ TETA_1 = None try: structureA = [[-0.048, 3.749, 0.000], [92.111, 48.141, 16.362]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if "angle" in rot2.keys(): if rot2["angle"] >= TETA_1: rot2["angle"] = TETA_1 else: rot2["angle"] = TETA_1 if TETA_1 <= 1000 and TETA_1 <= threshold: return True else: return False def simpleDistributionOrientationComparison3(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): """ :param rot1: :type rot1: :param rot2: :type rot2: :param threshold: :type threshold: :param quate: :type quate: :param convNames: :type convNames: :param shift: :type shift: :param where: :type where: :param cell_dim: :type cell_dim: :param print_angles: :type print_angles: :return: :rtype: """ TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getAtomsList("A", convNames[rot1["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 1" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 print "ATTENTION: Model", rot1["name"], "not found... Using just the model of rot2" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) try: structureA = Bioinformatics.getAtomsList("A", convNames[rot2["name"]]) structureA = [[float(structureA[0][6]), float(structureA[0][7]), float(structureA[-1][8])], [float(structureA[-1][6]), float(structureA[-1][7]), float(structureA[-1][8])]] structureB = copy.deepcopy(structureA) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateListByMatrix(0, 0, structureA, Aq, cell_dim) Bq = rot2["rotationMatrices"] structureB = rotateListByMatrix(1, 1, structureB, Bq, cell_dim) Aatm1 = structureA[0] Aatm2 = structureA[-1] Batm1 = structureB[0] Batm2 = structureB[-1] if print_angles: print "Structure 2" print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 print "ATTENTION: Model", rot2["name"], "not found... Using just the model of rot1" # print sys.exc_info() # traceback.print_exc(file=sys.stdout) if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison2(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim, print_angles=False): """ :param rot1: :type rot1: :param rot2: :type rot2: :param threshold: :type threshold: :param quate: :type quate: :param convNames: :type convNames: :param shift: :type shift: :param where: :type where: :param cell_dim: :type cell_dim: :param print_angles: :type print_angles: :return: :rtype: """ TETA_1 = None TETA_2 = None try: structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot1["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] # print "---", Aq # print "---", Bq structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_1 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_1 = TETA_1 * 57.2957795 except: TETA_1 = 1000 try: structureA = Bioinformatics.getStructure("A", convNames[rot2["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] # print convNames[rot1["name"]],convNames[rot2["name"]] structureA = rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=False) Bq = rot2["rotationMatrices"] structureB = rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=False) Aatm1 = structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Aatm2 = structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() Batm1 = structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_coord() Batm2 = structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_coord() if print_angles: print "Aatm1", Aatm1 print "Aatm2", Aatm2 print "Batm1", Batm1 print "Batm2", Batm2 X1 = Aatm2[0] - Aatm1[0] Y1 = Aatm2[1] - Aatm1[1] Z1 = Aatm2[2] - Aatm1[2] X2 = Batm2[0] - Batm1[0] Y2 = Batm2[1] - Batm1[1] Z2 = Batm2[2] - Batm1[2] TETA_2 = angle_between([X1, Y1, Z1], [X2, Y2, Z2], [0.0, 0.0, 1.0], signed=False) TETA_2 = TETA_2 * 57.2957795 except: TETA_2 = 1000 if print_angles: print "TETA_1", TETA_1, "TETA_2", TETA_2, "threshold", threshold # print structureA.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureA.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[0]["CA"].get_full_id() # print structureB.get_list()[0].get_list()[0].get_list()[-1]["CA"].get_full_id() if min(TETA_1, TETA_2) <= 1000 and min(TETA_1, TETA_2) <= threshold: return True else: return False def simpleDistributionOrientationComparison(rot1, rot2, threshold, quate, convNames, shift, where, cell_dim): """ :param rot1: :type rot1: :param rot2: :type rot2: :param threshold: :type threshold: :param quate: :type quate: :param convNames: :type convNames: :param shift: :type shift: :param where: :type where: :param cell_dim: :type cell_dim: :return: :rtype: """ # if not (os.path.exists("./temp/")): # os.makedirs("./temp/") structureA = Bioinformatics.getStructure("A", convNames[rot1["name"]]) structureB = Bioinformatics.getStructure("B", convNames[rot2["name"]]) Aq = rot1["rotationMatrices"] rotateStructureByMatrix(0, 0, structureA, Aq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/0_0_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru00,False,False,"stru00") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/0_0_rot.pdb", False) Astructure = tupleResult[0] AlistFrags = tupleResult[1] # Bq = rot2["quaternion"] Bq = rot2["rotationMatrices"] rotateStructureByMatrix(1, 1, structureB, Bq, "./temp/", cell_dim, writePDB=True) # tupleResult = Bioinformatics.getFragmentListFromPDB("./temp/1_1_rot.pdb",False,False) # tupleResult = Bioinformatics.getFragmentListFromStructure(stru11,False,False,"stru11") tupleResult = Bioinformatics.getFragmentListFromPDBUsingAllAtoms("./temp/1_1_rot.pdb", False) Bstructure = tupleResult[0] BlistFrags = tupleResult[1] """ #============TEMPORANEO=============== maxl = 0 ind = 0 for i in range(len(AlistFrags)): if maxl < (AlistFrags[i])["fragLength"]: ind = i maxl = (AlistFrags[i])["fragLength"] AlistFrags = [AlistFrags[ind]] BlistFrags = [BlistFrags[ind]] #===========TEMPORANEO================ """ # print "lenA",len(AlistFrags),"lenB",len(BlistFrags) nWindows, comp_windows, anyWay = Bioinformatics.compareDistributionAccordingOrientation(AlistFrags, BlistFrags, threshold, shift, where) """ print "---------------" print str(rot1["quaternion"]) print str(rot2["quaternion"]) print str(anyWay) print nWindows,len(comp_windows) print "---------------" """ # os.remove("./temp/0_0_rot.pdb") # os.remove("./temp/1_1_rot.pdb") result = False for t in range(len(nWindows)): prop = numpy.ceil((nWindows[t] * 40) / 100) if len(comp_windows[t]) >= (nWindows[t] - prop): result = True """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]) print "---------------" """ else: """ print "---------------" print str(Aq) print str(Bq) print str(comp_windows) print nWindows[t],len(comp_windows[t]),shift,where print "---------------" """ return False, len(comp_windows[t]) return result, None def compareRotation(rot1, rot2, treshold, mode, quate, laue, listNCS, convNames, cell_dim, evaLLONG, print_angles=False): """ :param rot1: :type rot1: :param rot2: :type rot2: :param treshold: :type treshold: :param mode: :type mode: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param convNames: :type convNames: :param cell_dim: :type cell_dim: :param evaLLONG: :type evaLLONG: :param print_angles: :type print_angles: :return: :rtype: """ if mode == "distributionCV": investigate = False for shift in range(8): if shift > 0 and not evaLLONG: break if not evaLLONG: shift = 0 else: shift = shiftew firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "A", cell_dim) if firstResult[0]: return True, shift else: # Search symmetry equivalence between rotations contr = 0 secondRe = None for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondRe = (True, shift) break elif secondResult[1] > 0: investigate = True if secondRe != None: return secondRe else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "A", cell_dim) if thirdResult[0]: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, shift if not firstResult[1] and not investigate: break if evaLLONG: investigate = False for shift in range(1, 8): firstResult = simpleDistributionOrientationComparison(rot1, rot2, treshold, quate, convNames, shift, "B", cell_dim) if firstResult[0]: return True, -1 * shift else: # Search symmetry equivalence between rotations contr = 0 for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison(rot3, rot2, treshold, quate, convNames, shift, "B", cell_dim) if secondResult[0]: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) return True, -1 * shift elif secondResult[1] > 0: investigate = True if not firstResult[1] and not investigate: break return False, 0 elif mode == "rot_matrices": firstResult = simpleDistributionOrientationComparison4(rot1, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if firstResult: return True, 0 else: # Search symmetry equivalence between rotations contr = 0 secondResult = False for rti, rtq in (quate.matricesRot[laue]).iteritems(): if contr == 0: contr += 1 continue contr += 1 new_quat = quate.RotateQuaternion(rot1["quaternion"], (quate.quaterRotaz[laue])[rti]) rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(rtq) secondResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if secondResult: rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(rtq) # return True,shift secondResult = True break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rot3 = {} rot3["name"] = rot1["name"] rot3["rotationMatrices"] = copy.deepcopy(rot1["rotationMatrices"]) rot3["rotationMatrices"].append(ncs) new_quat = quate.convertRotMatrixToQuaternion2(ncs) thirdResult = simpleDistributionOrientationComparison4(rot3, rot2, treshold, quate, convNames, 0, "A", cell_dim, print_angles=print_angles) if thirdResult: # print "Result:",secondResult rot1["simmetry_rotated"] = new_quat rot1["rotationMatrices"].append(ncs) # print rot1["name"], "COMPATIBLE WITH", new_quat, ncs return True, 0 return False, 0 elif mode == "quaternion": q1 = rot1["quaternion"] q2 = rot2["quaternion"] # if "simmetry_rotated" in rot1: # q1 = rot1["simmetry_rotated"] # if "simmetry_rotated" in rot2: # q2 = rot2["simmetry_rotated"] firstResult = simpleQuaternionComparison(q1, q2, treshold, quate) # print "Direct compare:",firstResult if firstResult: return True, 0 else: # Search symmetry equivalence between rotations secondResult = False for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q secondResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if secondResult: rot1["simmetry_rotated"] = new_quat # return True,0 break if secondResult: return True, 0 else: for ncs in listNCS: print "COMPARING NCS MATRIX", ncs rtq = quate.convertRotMatrixToQuaternion2(ncs) new_quat = quate.RotateQuaternion(rot1["quaternion"], rtq) # print "Trying equivalent",rti,rtq # print "Rotated quaternion",new_quat # print "Compared with",q2 # matri_q = quate.convertQuaternionToMatrix(new_quat) # print "Matrice: ",matri_q thirdResult = simpleQuaternionComparison(new_quat, q2, treshold, quate) # print "Result:",secondResult if thirdResult: rot1["simmetry_rotated"] = new_quat # print rot1["name"], "COMPATIBLE WITH", new_quat return True, 0 return False, 0 def simpleQuaternionComparison2(rot1q, rot2q, treshold, quate): """ Thesis Robert Adam Nicholls """ angle = quate.QuaternionDotProduct(rot1q, rot2q) # 2. n(th) Chebyshev Polynomial angle = 2 - 2 * (angle ** 2) print "distance cosin", angle # 3. Compute the arcosin of the angle of the wauternion above angle = numpy.arccos(angle) # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree print "Angle degreee: ", angle_degree if angle_degree <= treshold: return True else: return False def simpleQuaternionComparison(rot1q, rot2q, treshold, quate): """ James Diebel 2006 """ ############## # NEW METHOD # ############## # 1. Compute the quaternion inverse rot1q inv = quate.QuaternionInverse(rot1q) # print inv # 2. Compute the product of rot2q and thr inverse of rot1q res = quate.QuaternionProduct(rot2q, inv) # res = quate.QuaternionProduct(inv,rot2q) # print "QProduct",res # print "QProduct2",quate.QuaternionProduct(inv,rot2q) # 3. Compute the arccos of the angle of the quaternion above angle = numpy.arccos(res[3]) # 4. Multiply the angle for 2.0f to have the angle in radians angle_rad = angle * 2.0 # 5. Multiply the angle for 57.2957795f to have the angle in degrees angle_degree = angle_rad * 57.2957795 if angle_degree > 180: angle_degree = 360.0 - angle_degree # print "Angle degreee: ",angle_degree if angle_degree <= treshold: return True else: return False # tetaThreshRad = (treshold*2*numpy.pi)/360 # treshAngleRot = (treshAngleRot*2*numpy.pi)/360 # tetaVect = angleRadBetweenVectors(rot1q[:-1], rot2q[:-1]) # uno = (tetaVect <= tetaThreshRad) # due = (numpy.abs(rot1q[-1]-rot2q[-1]) <= treshAngleRot) # print "//////////////" # print "Comparing:" # print str(rot1q) # print str(rot2q) """ dot = (rot1q[3]*rot2q[3]) + (rot1q[0]*rot2q[0]) + (rot1q[1]*rot2q[1]) + (rot1q[2]*rot2q[2]) #print "dot is:",abs(dot),(1.0-abs(dot)),treshold #print "////////////" dot = abs(dot) if (1.0-dot) <= treshold: #if ((rot2q[3]<0 and rot1q[3]<0) or (rot2q[3]>=0 and rot1q[3]>=0)): # return True #else: # return False return True else: return False """ # print "======================================" # print rot1q # print rot2q # print uno,due # print tetaVect, tetaThreshRad, uno # print rot1q[-1], rot2q[-1], treshAngleRot, due # print "======================================" # return (uno and due) def readTranslationsFTF(baseDir, name, quate, mode, tops=None,make_positive_llg=False): """ Process output from steps involving translations :param baseDir: path where the files are found :type baseDir: str :param name: name of the files without extension (e.g. 0.sol, 0.sh, will be 0) :type name: str :param quate: static reference to the Quaternions class :type quate: instance of Quaternions.Quaternions() :param mode: mode that produced the output. Can be: 'PICASSO','PACK','RNP','RNP_GIMBLE','TRA' :type mode: str :param tops: :type tops: :param make_positive_llg: :type make_positive_llg: bool :return tra: :rtype tra: :return fixed: :rtype fixed: list :return diziona: :rtype diziona: dict :return convertnames: :rtype convertnames: """ sol = baseDir + name + ".sol" out = baseDir + name + ".out" sh = baseDir + name + ".sh" fixed = [] diziona = {} FOM = {} savellg = 0.0 savezscore = 0.0 diziocorresp = {} dinuovoname = "" cutoff = 0 fi = open(sh, "r") script = fi.readlines() # reading lines from PHASER input script fi.close() dict_rmsd_ensem = {} for linea in script: l = linea.split() if l[0].startswith("ENS") and "RMS" in l: dict_rmsd_ensem[l[1]] = float(l[l.index("RMS")+1]) if l[0] == "SEARCH" and l[1].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[2] if l[0] == "SOLU" and l[1] == "TRIAL" and l[2].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[3] if "search_name" not in dict_rmsd_ensem: solu = 0 indel = None for li,linea in enumerate(script): l = linea.split() if l[0] == "SOLU" and l[1] == "SET": solu += 1 if solu == 2: indel = li-1 break if solu > 0 and l[0] != "SOLU": indel = li-1 break l = script[indel].split() if l[0] == "SOLU" and l[1] == "6DIM" and l[2].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[3] dict_rmsd_ensem["search_rmsd"] = dict_rmsd_ensem[dict_rmsd_ensem["search_name"]] if mode != "PICASSO": fi = open(out, "r") traFOM = fi.readlines() # reading lines from PHASER output fi.close() if len(traFOM) == 0: raise Exception(".out not ready!") if not os.path.exists(sol): return ([], [], {}, {}) fi = open(sh, "r") script = fi.readlines() # reading lines from PHASER input script fi.close() fixed = [] first = False second = False nfixed = 0 nfrags = 0 for linea in script: lista = linea.split() # Parsing each line of the PHASER input script->split in lista: if lista[0] == "PACK" and lista[1] == "CUTOFF": cutoff = int(lista[2]) if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: second = True nfrags = 0 # if mode != "RNP" or mode != "RNP_GIMBLE": # break if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: dizio = {} dizio["rmsd"] = dict_rmsd_ensem[str(lista[3])] dizio["original_rotcluster"] = str(lista[17]) dizio["zscore"] = float(lista[21]) if make_positive_llg and float(lista[19]) < 0: dizio["llg"] = -1 * float(lista[19]) else: dizio["llg"] = float(lista[19]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) dizio["name_as_in_sh"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) dizio["name_as_in_sh"] = str(lista[3]) else: dizio["name"] = str(lista[3]) dizio["name_as_in_sh"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) elif mode in ["RNP", "PACK", "RNP_GIMBLE"]: dizio = {} dizio["rmsd"] = dict_rmsd_ensem[str(lista[3])] dizio["original_rotcluster"] = str(lista[15]) dizio["zscore"] = float(lista[19]) if make_positive_llg and float(lista[17]) < 0: dizio["llg"] = -1 * float(lista[17]) else: dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir if str(lista[15]).startswith("ensembleID"): dizio["name"] = str(lista[15]) dizio["name_as_in_sh"] = str(lista[15]) elif str(lista[3]).startswith("ensarci"): dizio["name"] = str(lista[20]) dizio["name_as_in_sh"] = str(lista[3]) else: dizio["name"] = str(lista[3]) dizio["name_as_in_sh"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 nfrags += 1 fixed.append(dizio) nfixed = nfrags - 1 if mode in ["PACK"] and not second: savellg = fixed[-1]["llg"] savezscore = fixed[-1]["zscore"] dinuovoname = fixed[-1]["name"] fixed = fixed[:-1] elif mode in ["RNP", "PACK", "RNP_GIMBLE"]: fixed = [fixed[i:i + nfixed + 1] for i in range(0, len(fixed), nfixed + 1)] savfi = copy.deepcopy(fixed) fixed = [ sorted(fi, key=lambda x: int(x["name"].split("FR")[1].split("_")[0])) if "FR" in fi[0]["name"] else fi for fi in fixed] if mode == "RNP" or mode == "RNP_GIMBLE": startc = 0 for linea in traFOM: if startc >= 1: led = linea.strip().split() if len(led) == 0: break if led[0] != "---": zs = 0.0 if led[5] != "n/a": zs = float(led[5]) elif led[8] != "n/a": #NS : changed for allowing only top hits to be considered out of RNP zs = float(led[8]) if led[0].startswith("Top"): diziocorresp[int(led[0].split("Top")[1])] = ( int(led[2]), float(led[6]), zs) # PHASER 2.6.0 else: diziocorresp[int(led[0])] = ( int(led[2]), float(led[6]), zs) # {num_in_sol:(num_in_sh,llg_refined)} # if led[0].startswith("Top"): # diziocorresp[int(led[0].split("Top")[1])] = ( # int(led[0].split("Top")[1]), float(led[6]), zs) # PHASER 2.6.0 # else: # diziocorresp[int(led[0])] = (int(led[0]), float(led[6]), zs) if linea.strip().startswith("#out =#out"): # PHASER 2.7.9 startc += 1 elif mode == "PACK": startc = 0 diziocorresp = {} for linea in traFOM: if startc >= 1: led = linea.split() if len(led) == 0: break if led[2].startswith(">"): led[2] = 10000 diziocorresp[int(led[0])] = float(led[2]) # PHASER 2.7.x {num_in_sh:percent_clashes} # if linea.strip().startswith("# # #Clash Packs"): #PHASER 2.5.5 if linea.strip().startswith("#in #out"): # PHASER 2.7.9 startc += 1 diziona = {} list_trial = [] if mode == "TRA": for linea in script: lista = linea.split() #print 'lista',lista if lista[0] == "SOLU" and lista[1] == "TRIAL": #print 'llalalala inside' #print 'len(lista)',len(lista) # if len(lista) == 14: # phaser 2.7 if len(lista) == 16: # phaser 2.9 # ['SOLU', 'TRIAL', 'ENSEMBLE', 'ensemble13', 'EULER', '0.0', '0.0', '0.0', 'RF', '40.0', # 'RFZ', '0.0', '#', 'ensemble13', 'CLUSTER:', '27'] diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[15]) list_trial.append(lista[13]) for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": if len(lista) == 22: diziona[(float(lista[5]), float(lista[6]), float(lista[7]))] = int(lista[17]) list_trial.append(lista[15]) elif mode == "PACK" or mode == "RNP" or mode == "RNP_GIMBLE": leggiTra1 = False leggiTra2 = False temp = [] for linea in script: lista = linea.split() if lista[0] == "ROOT": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] break if lista[0] == "SOLU" and lista[1] == "SET": if leggiTra2: leggiTra2 = False diziona[tuple((temp[-1])["euler"])] = (temp[-1])["n_prev_cluster"] temp = [] leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2: leggiTra1 = False dizio = {} dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["n_prev_cluster"] = int(lista[15]) temp.append(dizio) FOM = {} start = False startskip = -1 section = False for linea in traFOM: if startskip > 0: startskip -= 1 continue if startskip == 0: start = True startskip = -1 if linea.startswith("Number LLG Z-Score"): section = True continue if mode != "RNP" and mode != "RNP_GIMBLE" and section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if (mode == "RNP" or mode == "RNP_GIMBLE") and linea.strip() == "Refinement Table (Sorted)": startskip = 3 continue if mode != "RNP" and mode != "RNP_GIMBLE" and start and linea.startswith("$$"): start = False break if (mode == "RNP" or mode == "RNP_GIMBLE") and start and len(linea.split()) == 0: start = False break if start: lista = linea.strip().split() if mode == "TRA": FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] elif mode == "RNP" or mode == "RNP_GIMBLE": zs = 0.0 if lista[0] != "---": if lista[5] != "n/a": zs = float(lista[5]) elif lista[8] != "n/a": zs = float(lista[8]) if lista[0].startswith("Top"): FOM[int(lista[0].split("Top")[1])] = [float(lista[6]), zs] # PHASER 2.6.0 else: FOM[int(lista[0])] = [float(lista[6]), zs] # PHASER 2.6.0 # FOM[int(lista[0])] = [float(lista[4]),0.0] fi = open(sol, "r") # Resind in the PHASER sol file traslazioni = fi.readlines() fi.close() tra = [] numero = 1 leggiTra1 = False leggiTra2 = False temp = [] PREVAL = {} numera2 = 0 tcns = False convertnames = {} nameRota = "" vrms = None vdelta = None vname = None vdict = {} for linea in traslazioni: if tops != None and numero > tops: break lista = linea.split() #print 'SHERLOCK lista',lista # PHASER 29 translation # SOLU SET RF++ TFZ=3.4 # SOLU HISTORY RF/TF(1/1:1) # SOLU SPAC P 1 21 1 # SOLU 6DIM ENSE ensemble13 EULER 0.000 0.000 0.000 FRAC 0.71156 0.00000 0.20197 BFAC 0.00000 # CLUSTER 1 # PHASER 29 rigid body refinement # SOLU SET LLG=46 TFZ==7.6 # SOLU HISTORY RNP(1:1) # SOLU SPAC C 2 2 21 # SOLU 6DIM ENSE ensarci0 EULER 17.165 29.861 169.673 FRAC 0.00330 0.41038 0.68639 BFAC 0.00000 #TFZ==7.6 # SOLU ENSEMBLE ensarci0 VRMS DELTA -0.3070 #RMSD 0.90 #VRMS 0.71 if lista[0] == "SOLU" and lista[1] == "ENSEMBLE": if "#VRMS" in lista: ind = lista.index("#VRMS") + 1 ind2 = lista.index("DELTA") + 1 ind3 = lista.index("ENSEMBLE") + 1 if len(lista) > ind: #if vrms is None or vrms > float(lista[ind]): vrms = float(lista[ind]) vdelta = float(lista[ind2]) vname = lista[ind3].split('[')[0] vdict[vname] = (vrms,vdelta) continue # New Phaser have this new line to skip if lista[0] == "SOLU" and lista[1] == "HISTORY": # SOLU HISTORY RF/TF(1/1:1) # for phaser 2.9 valua = lista[2] #print 'valua',valua if "PAK" in lista[2]: valua = lista[3] if mode == "TRA": if "RF/TF" in valua: #print 'found RF/TF' #print 'valua.split("RF/TF")[1]',valua.split("RF/TF")[1] #print 'valua.split("RF/TF")[1].split("/")[0]',valua.split("RF/TF")[1].split("/")[0] #print 'valua.split("RF/TF")[1].split("/")[0][1:]',valua.split("RF/TF")[1].split("/")[0][1:] #print 'list_trial',list_trial index_rlist = int(valua.split("RF/TF")[1].split("/")[0][1:]) - 1 nameRota = list_trial[index_rlist] else: index_rlist = int(valua.split("RF")[1].split(")")[0][1:]) - 1 nameRota = list_trial[index_rlist] continue # Phaser 2.6.1 if lista[0] == "SOLU" and lista[1] == "SET": if lista[-1] == "+TNCS": tcns = True if mode == "PICASSO": numera2 += 1 PREVAL[str(numera2)] = [float(((lista[5]).split("="))[-1]), float(((lista[3]).split("="))[-1])] # elif mode == "RNP" or mode == "RNP_GIMBLE": # numera2 += 1 # PREVAL[str(numera2)] = float(lista[-1][5:]) if leggiTra2: leggiTra2 = False #print "SHERLOCK VRMSD is",vrms if vrms is not None: # (temp[-1])["vrms"] = vdict[fixed[int(diziocorresp[numero][0]) - 1][-1]["name_as_in_sh"]][0] # (temp[-1])["vrms_delta"] = vdict[fixed[int(diziocorresp[numero][0]) - 1][-1]["name_as_in_sh"]][1] for qet, let in enumerate(fixed[int(diziocorresp[numero][0]) - 1]): #print("ASSOCIO:", qet, int(diziocorresp[numero][0]) - 1, let["name_as_in_sh"],vdict[let["name_as_in_sh"]]) (temp[qet])["vrms"] = vdict[let["name_as_in_sh"]][0] (temp[qet])["vrms_delta"] = vdict[let["name_as_in_sh"]][1] if mode == "PACK": (temp[-1])["zscore"] = savezscore if make_positive_llg and savellg < 0: (temp[-1])["llg"] = -1 * savellg else: (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] if make_positive_llg and PREVAL[str(numero)][0] < 0: (temp[-1])["llg"] = -1 * PREVAL[str(numero)][0] else: (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP" or mode == "RNP_GIMBLE": # if lista[-1].startswith("TFZ=="): # (temp[-1])["zscore"] = PREVAL[str(numero)] # else: # (temp[-1])["zscore"] = 0.0 (temp[-1])["tfz=="] = diziocorresp[numero][2] (temp[-1])["zscore"] = 0.0 if make_positive_llg and diziocorresp[numero][1] < 0: (temp[-1])["llg"] = -1 * diziocorresp[numero][1] else: (temp[-1])["llg"] = diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] else: (temp[-1])["zscore"] = FOM[numero][1] if make_positive_llg and FOM[numero][0] < 0: (temp[-1])["llg"] = -1 * FOM[numero][0] else: (temp[-1])["llg"] = FOM[numero][0] # NOTE to be tested with inverted helices if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] if make_positive_llg and temp[-1]["llg"] < 0: temp[fg]["llg"] = -1 * temp[-1]["llg"] else: temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] if make_positive_llg and fixed[fg]["llg"] < 0: temp[fg]["llg"] = -1 * fixed[fg]["llg"] else: temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) # NOTE to be tested with inverted helices if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw # print "///////////////////////////////////////" # print temp[-1] # print "///////////////////////////////////////" # tra.append((temp[-1])) tra.append(temp) temp = [] numero += 1 leggiTra1 = True continue if leggiTra1: leggiTra2 = True leggiTra1 = False continue if leggiTra2 and lista[1]!='GYRE' and lista[8]!='#VRMS': # no gyre line or vrms leggiTra1 = False dizio = {} add = 0 # if dizio.has_key('euler'): # print "\n********************************************" # print 'lista', lista # print 'Ya tengo dizio rellenado ',dizio["euler"] # print "********************************************\n" # else: # print "\n********************************************" # print 'lista', lista # print 'I do not have dizio rellenado' # print "********************************************\n" if lista[4] == "EULER": add += 1 dizio["euler"] = [float(lista[4 + add]), float(lista[5 + add]), float(lista[6 + add])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = nameRota dizio["frac"] = [float(lista[8 + add]), float(lista[9 + add]), float(lista[10 + add])] dizio["bfactor"] = float(lista[12 + add]) dizio["elong"] = 0 temp.append(dizio) if len(temp) > 0: # print "ACTUAL VRMS2 is",vrms if vrms is not None: #(temp[-1])["vrms"] = vdict[fixed[int(diziocorresp[numero][0]) - 1][-1]["name_as_in_sh"]][0] #(temp[-1])["vrms_delta"] = vdict[fixed[int(diziocorresp[numero][0]) - 1][-1]["name_as_in_sh"]][1] for qet,let in enumerate(fixed[int(diziocorresp[numero][0]) - 1]): #print("ASSOCIO:",qet,int(diziocorresp[numero][0]) - 1,let["name_as_in_sh"],vdict[let["name_as_in_sh"]]) (temp[qet])["vrms"] = vdict[let["name_as_in_sh"]][0] (temp[qet])["vrms_delta"] = vdict[let["name_as_in_sh"]][1] if mode == "PACK": (temp[-1])["zscore"] = savezscore if make_positive_llg and savellg < 0: (temp[-1])["llg"] = -1 * savellg else: (temp[-1])["llg"] = savellg (temp[-1])["name"] = dinuovoname elif mode == "PICASSO": (temp[-1])["zscore"] = PREVAL[str(numero)][1] if make_positive_llg and PREVAL[str(numero)][0] < 0: (temp[-1])["llg"] = -1 * PREVAL[str(numero)][0] else: (temp[-1])["llg"] = PREVAL[str(numero)][0] elif mode == "RNP" or mode == "RNP_GIMBLE": if make_positive_llg and diziocorresp[numero][1] < 0: (temp[-1])["llg"] = -1 * diziocorresp[numero][1] else: (temp[-1])["llg"] =diziocorresp[numero][1] (temp[-1])["name"] = fixed[int(diziocorresp[numero][0]) - 1][-1]["name"] (temp[-1])["tfz=="] = diziocorresp[numero][2] (temp[-1])["zscore"] = 0.0 else: (temp[-1])["zscore"] = FOM[numero][1] if make_positive_llg and FOM[numero][0] < 0: (temp[-1])["llg"] = -1 * FOM[numero][0] else: (temp[-1])["llg"] = FOM[numero][0] # NOTE to be tested with inverted helices if tcns: for fg in range(len(temp) - 1): if fg == len(temp) - 2: temp[fg]["zscore"] = temp[-1]["zscore"] if make_positive_llg and temp[-1]["llg"] < 0: temp[fg]["llg"] = -1 * temp[-1]["llg"] else: temp[fg]["llg"] = temp[-1]["llg"] else: temp[fg]["zscore"] = fixed[fg]["zscore"] if make_positive_llg and fixed[fg]["llg"] < 0: temp[fg]["llg"] = -1 * fixed[fg]["llg"] else: temp[fg]["llg"] = fixed[fg]["llg"] if temp[fg]["name"].startswith("ensembleID"): w = temp[fg]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(fg) nw = w[0] + "FR" + w[1] + "_" + w[2] temp[fg]["name"] = nw if fg == len(temp) - 2: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) else: temp[fg]["original_rotcluster"] = fixed[fg]["original_rotcluster"] else: temp[fg]["original_rotcluster"] = str(diziona.values()[0]) temp[-1]["fixed_frags"] = copy.deepcopy(temp[:-1]) # NOTE to be tested with inverted helices if tcns and len(temp[-1]["fixed_frags"]) > 0 and temp[-1]["name"].startswith("ensembleID"): w = temp[-1]["name"].split("FR") w = w[:-1] + w[-1].split("_") w[1] = str(len(temp[-1]["fixed_frags"])) nw = w[0] + "FR" + w[1] + "_" + w[2] convertnames[nw] = temp[-1]["name"] temp[-1]["name"] = nw # tra.append((temp[-1])) tra.append(temp) # print "=====================TEMP IS===================" # print temp # print "===============================================" temp = [] numero += 1 # print "number of translation read", len(tra) # print "number of fixed",len(fixed) if mode == "PACK" and len(diziocorresp.keys()) > 0: pat = [] for sdf in range(len(fixed)): if diziocorresp[sdf + 1] <= cutoff: rt = fixed[sdf][-1] rt["fixed_frags"] = fixed[sdf][:-1] # print "sdf",sdf,type(rt),len(rt["fixed_frags"]) pat.append(copy.deepcopy(rt)) tra = pat for trrr in range(len(tra)): if isinstance(tra[trrr], list): trasla = tra[trrr][-1] else: trasla = tra[trrr] if mode == "RNP" or mode == "RNP_GIMBLE": # print "trrr+1",trrr+1 # print "corrispon",diziocorresp[trrr+1][0] # print "len fixed",len(fixed) # print fixed[diziocorresp[trrr+1][0]-1] # print fixed[diziocorresp[trrr+1][0]-1][:-1] # print trasla["name"] zen = [] for q, fir in enumerate(savfi[diziocorresp[trrr + 1][0] - 1][:-1]): fir["euler"] = tra[trrr][q]["euler"] fir["frac"] = tra[trrr][q]["frac"] fir["quaternion"] = tra[trrr][q]["quaternion"] fir["rotationMatrices"] = tra[trrr][q]["rotationMatrices"] if "vrms" in tra[trrr][q]: fir["vrms"] = tra[trrr][q]["vrms"] fir["vrms_delta"] = tra[trrr][q]["vrms_delta"] zen.append(fir) if len(zen) > 0: zen = sorted(zen, key=lambda x: int(x["name"].split("FR")[1].split("_")[0])) if "FR" in zen[0][ "name"] else zen trasla["fixed_frags"] = copy.deepcopy(zen) # ultimo di ogni sottolista di fixed e la soluzione stessa # NOTE the following is how it was before # trasla["fixed_frags"] = copy.deepcopy(fixed[diziocorresp[trrr + 1][0] - 1][:-1]) # ultimo di ogni sottolista di fixed e la soluzione stessa elif not ((mode == "PACK" and len(diziocorresp.keys()) > 0) or tcns): trasla["fixed_frags"] = copy.deepcopy(fixed) baseCombi = "" for fi in trasla["fixed_frags"]: if len(baseCombi) == 0: baseCombi += fi["original_rotcluster"] else: baseCombi += "_" + fi["original_rotcluster"] fi["original_rotcluster"] = baseCombi fi["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) if isinstance(tra[trrr], list): tra[trrr] = tra[trrr][-1] for t in tra: t["rmsd"] = dict_rmsd_ensem["search_rmsd"] return tra, fixed, diziona, convertnames def readRotationsFRF(baseDir, name, quate, tops=None, make_positive_llg=False): """Process rlist and out from a rotation search. :param baseDir: :type baseDir: :param name: :type name: :param quate: :type quate: :param tops: :type tops: :param make_positive_llg: :type make_positive_llg: :return: :rtype: """ rlist = os.path.join(baseDir, name + ".rlist") out = os.path.join(baseDir, name + ".out") sh = os.path.join(baseDir, name + ".sh") # Read the .sh file that contained the input script for Phaser fi = open(sh, "r") script = fi.readlines() fi.close() # Check for fixed fragments in the input dict_rmsd_ensem = {} for linea in script: l = linea.split() if l[0].startswith("ENS") and "RMS" in l: dict_rmsd_ensem[l[1]] = float(l[l.index("RMS") + 1]) if l[0] == "SEARCH" and l[1].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[2] if l[0] == "SOLU" and l[1] == "TRIAL" and l[2].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[3] if "search_name" not in dict_rmsd_ensem: solu = 0 indel = None for li, linea in enumerate(script): l = linea.split() if l[0] == "SOLU" and l[1] == "SET": solu += 1 if solu == 2: indel = li - 1 break if solu > 0 and l[0] != "SOLU": indel = li - 1 break l = script[indel].split() if l[0] == "SOLU" and l[1] == "6DIM" and l[2].startswith("ENS"): dict_rmsd_ensem["search_name"] = l[3] dict_rmsd_ensem["search_rmsd"] = dict_rmsd_ensem[dict_rmsd_ensem["search_name"]] """ dati_fixed = [] for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "6DIM": dati_fixed.append((int(lista[15]),float(lista[17]),float(lista[19]))) """ fixed = [] first = False baseCombi = "" for linea in script: lista = linea.split() if lista[0] == "SOLU" and lista[1] == "SET": if not first: first = True else: break if lista[0] == "SOLU" and lista[1] == "6DIM": # print '********Fixed rotation solution found ' # print 'lista',lista # print 'len(lista)',len(lista) # sys.exit(0) if len(lista) == 20: dizio = {} if len(baseCombi) == 0: baseCombi += str(lista[15]) else: baseCombi += "_" + str(lista[15]) dizio["original_rotcluster"] = baseCombi dizio["n_prev_cluster"] = __getIDClusterFromDescription(baseCombi) dizio["zscore"] = float(lista[19]) if make_positive_llg and float(lista[17]) < 0: dizio["llg"] = -1 * float(lista[17]) else: dizio["llg"] = float(lista[17]) dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [float(lista[9]), float(lista[10]), float(lista[11])] dizio["bfactor"] = float(lista[13]) dizio["elong"] = 0 dizio["rmsd"] = dict_rmsd_ensem[dizio["name"]] fixed.append(dizio) fi = open(rlist, "r") rotazioni = fi.readlines() fi.close() fi = open(out, "r") rotFOM = fi.readlines() fi.close() FOM = {} # Processing phaser log found in the out file # NOTE: in principle we don't need to do this anymore, all information is found at the rlist file start = False section = False for linea in rotFOM: if linea.startswith("Number LLG Z-Score"): section = True continue if section and linea.startswith("$$ loggraph $$"): # NEW PHASER start = True continue if start and linea.startswith("$$"): start = False break if start: lista = linea.split() if len(lista) == 3: FOM[int(lista[0])] = [float(lista[1]), float(lista[2])] else: start = False rota = [] numero = 1 for linea in rotazioni: if tops != None and numero > tops: break lista = linea.split() if lista[0] == "SOLU" and lista[1] == "TRIAL": # print 'lista', lista # PHASER 2.8 or 2.9 lista # ['SOLU', 'TRIAL', 'ENSEMBLE', 'ensemble1', 'EULER', '111.032', '81.866', '354.600', # 'RF', '21.3', 'RFZ', '2.71', '#', 'CLUSTER', '1'] # print "lista.index('EULER')",lista.index('EULER') index_euler = lista.index('EULER') try: # print "lista.index('RF')", lista.index('RF') index_rf = lista.index('RF') except: # print sys.exc_info() # traceback.print_exc(file=sys.stdout) # print 'This is a phaser 2.7 rlist type of output' index_rf = False # print "lista.index('RFZ')", lista.index('RFZ') index_rfz = lista.index('RFZ') # sys.exit(0) dizio = {} # dizio["euler"] = [float(lista[5]), float(lista[6]), float(lista[7])] dizio["euler"] = [float(lista[index_euler + 1]), float(lista[index_euler + 2]), float(lista[index_euler + 3])] # dizio["zscore"] = FOM[numero][1] dizio["zscore"] = float(lista[index_rfz + 1]) if make_positive_llg and FOM[numero][0] < 0: dizio["llg"] = -1 * FOM[numero][0] if index_rf: dizio["llg"] = -1 * float(lista[index_rf + 1]) else: dizio["llg"] = FOM[numero][0] if index_rf: dizio["llg"] = float(lista[index_rf + 1]) dizio["quaternion"] = quate.convertEulerToQuaternion(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2], "zyz") # PHASER CONVENTION Z,Y,Z dizio["rotationMatrices"] = [matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] dizio["baseDir"] = baseDir dizio["name"] = str(lista[3]) dizio["frac"] = [0.0, 0.0, 0.0] dizio["bfactor"] = 0.0 dizio["elong"] = 0 dizio["rmsd"] = dict_rmsd_ensem[dizio["name"]] rota.append(dizio) numero += 1 return rota, fixed def convertFromFracToOrth(t1, t2, t3, cell_dim, parameters): """ :param t1: :type t1: :param t2: :type t2: :param t3: :type t3: :param cell_dim: :type cell_dim: :param parameters: :type parameters: :return: :rtype: """ if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A tz = t3 / parameters["uu"] ty = (t2 - tz * parameters["vv"]) / parameters["uuy"] tx = (t1 - ty * parameters["vvz"] - tz * parameters["uuz"]) / parameters["vvy"] return tx, ty, tz, parameters def convertFromOrthToFrac(x, y, z, cell_dim, parameters): """ :param x: :type x: :param y: :type y: :param z: :type z: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param parameters: :type parameters: :return: :rtype: """ if len(parameters.keys()) == 0: parameters["A"] = A = float(cell_dim[0]) parameters["B"] = B = float(cell_dim[1]) parameters["C"] = C = float(cell_dim[2]) parameters["alphaDeg"] = alphaDeg = float(cell_dim[3]) parameters["betaDeg"] = betaDeg = float(cell_dim[4]) parameters["gammaDeg"] = gammaDeg = float(cell_dim[5]) parameters["alpha"] = alpha = (alphaDeg * 2 * numpy.pi) / 360 parameters["beta"] = beta = (betaDeg * 2 * numpy.pi) / 360 parameters["gamma"] = gamma = (gammaDeg * 2 * numpy.pi) / 360 parameters["c_a"] = c_a = numpy.cos(alpha) parameters["c_b"] = c_b = numpy.cos(beta) parameters["c_g"] = c_g = numpy.cos(gamma) parameters["s_g"] = s_g = numpy.sin(gamma) parameters["q"] = q = numpy.sqrt(1 + 2 * c_a * c_b * c_g - c_a ** 2 - c_b ** 2 - c_g ** 2) parameters["uu"] = uu = s_g / (q * C) parameters["vv"] = vv = (c_b * c_g - c_a) / (q * B * s_g) parameters["uuy"] = uuy = 1 / (B * s_g) parameters["vvz"] = vvz = -1 * (c_g / (A * s_g)) parameters["uuz"] = uuz = (c_a * c_g - c_b) / (q * A * s_g) parameters["vvy"] = vvy = 1 / A nx = (x * parameters["vvy"]) + (y * parameters["vvz"]) + (z * parameters["uuz"]) ny = (y * parameters["uuy"]) + (z * parameters["vv"]) nz = z * parameters["uu"] return nx, ny, nz, parameters def rotateStructureByQuaternion(num, num2, structure, quate, q, outputPath, mode="rotateByOrthCoord", cell_dim=[]): """ :param num: :type num: :param num2: :type num2: :param structure: :type structure: :param quate: :type quate: :param q: :type q: :param outputPath: :type outputPath: :param mode: :type mode: :param cell_dim: list with the unit cell parameters :type cell_dim: list :return: :rtype: """ # TODO: This methods does not work properly. Both rotateByOrthCoord and rotateByCrystCoord does not produce the correct rotation qConj = quate.QuaternionConjugate(q) pdb = open(outputPath + str(num) + "_" + str(num2) + "_rot.pdb", "w") parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) if mode == "rotateByCrystCoord": # print "Before conversion",x,y,z x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) # print "After conversion",x,y,z tmpQ = [0.0, 0.0, 0.0, 0.0] tmpQ[3] = 0 tmpQ[0] = x tmpQ[1] = y tmpQ[2] = z tmpQ2 = quate.QuaternionProduct(q, tmpQ) tmpQ = quate.PointQuaternionProd(tmpQ2, qConj) nx = tmpQ[0] ny = tmpQ[1] nz = tmpQ[2] if mode == "rotateByCrystCoord": # print "Before conversion after rotation",nx,ny,nz nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) # print "After conversion after rotation",nx,ny,nz ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def writeClustersPDBS(Clusters, dirout, mode, quate, convNames, printElonged, performTranslation, cell_dim, modeTra="frac"): """ :param Clusters: :type Clusters: :param dirout: :type dirout: :param mode: :type mode: :param quate: :type quate: :param convNames: :type convNames: :param printElonged: :type printElonged: :param performTranslation: :type performTranslation: :param cell_dim: :type cell_dim: :param modeTra: :type modeTra: :return: :rtype: """ if not os.path.exists(dirout): os.makedirs(dirout) for ci in range(len(Clusters)): clu = Clusters[ci] lion = (clu["heapSolutions"]).asList() dirClu = dirout + str(ci) + "/" if not os.path.exists(dirClu): os.makedirs(dirClu) structureRef = None for i in range(len(lion)): rota = (lion[i])[1] if not printElonged and rota["elong"] > 0: continue parser = PDBParser() structure = parser.get_structure(rota["name"], convNames[rota["name"]]) if mode == "matrix": rotateStructureByMatrix(ci, i, structure, rota["rotationMatrices"], dirClu, cell_dim) elif mode == "quaternion": rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) elif mode == "simmetry_rotated": """ quo = None if "simmetry_rotated" in rota: quo = rota["simmetry_rotated"] else: quo = rota["quaternion"] rotateStructureByQuaternion(ci,i,structure,quate,quo,dirClu) """ if "simmetry_rotated" in rota: # rotateStructureByQuaternion(ci,i,structure,quate,rota["quaternion"],dirClu) # parser=PDBParser() # structure=parser.get_structure(rota["name"],dirClu+str(ci)+"_"+str(i)+"_rot.pdb") rotateStructureByQuaternion(ci, i, structure, quate, rota["simmetry_rotated"], dirClu) else: rotateStructureByQuaternion(ci, i, structure, quate, rota["quaternion"], dirClu) # print performTranslation,modeTra if modeTra == "frac" and performTranslation: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyFrac(ci, i, structure, rota["frac"], dirClu, cell_dim) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") elif performTranslation and modeTra == "Cmass": # print "Devo scrivere traslai",i if i == 0: parser = PDBParser() structureRef = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") else: parser = PDBParser() structure = parser.get_structure(rota["name"], dirClu + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structure, structureRef, dirClu) os.remove(dirClu + str(ci) + "_" + str(i) + "_rot.pdb") def translateStructurebyFrac(num, num2, struct, frac, outputPath, cell_dim, writePDB=True): """ :param num: :type num: :param num2: :type num2: :param struct: :type struct: :param frac: :type frac: :param outputPath: :type outputPath: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param writePDB: :type writePDB: :return: :rtype: """ structure = copy.deepcopy(struct) if writePDB: nameOutput = outputPath + str(num) + "_" + str(num2) + "_rottra.pdb" # NSADD pdb = open(nameOutput, "w") # NS CHANGE parameters = {} for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) x, y, z, parameters = convertFromOrthToFrac(x, y, z, cell_dim, parameters) nx = x + frac[0] ny = y + frac[1] nz = z + frac[2] nx, ny, nz, parameters = convertFromFracToOrth(nx, ny, nz, cell_dim, parameters) atom.set_coord(numpy.array([nx, ny, nz])) if writePDB: ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) if writePDB: pdb.close() return nameOutput # NSADD else: return structure def filterEqualRotations(Clusters, convNames, quate, laue, listNCS, cell_dim, where): """ :param Clusters: :type Clusters: :param convNames: :type convNames: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param where: :type where: :return: :rtype: """ Clus = [] index = 0 for clu in Clusters: lis = copy.deepcopy(clu["heapSolutions"].asList()) lit = [] for item in lis: prio, rota = item lit.append(rota) Pru = [] Pru = clusterizeRotations(lit, quate, laue, listNCS, Pru, 0.0, "distributionCV", convNames, cell_dim, 0.5, False) din = {} hp = ADT.Heap() for tru in Pru: prio, item = tru["heapSolutions"].pop() hp.push(prio, item) writeSumClusters(tru, where, "clustersIDE" + str(index), convNames) din["heapSolutions"] = hp Clus.append(din) index += 1 return Clus def translateStructurebyVector(num, num2, structure, vect, outputPath, centroidCA=None): """ :param num: :type num: :param num2: :type num2: :param structure: :type structure: :param vect: :type vect: :param outputPath: :type outputPath: :param centroidCA: :type centroidCA: :return: :rtype: """ pdb = open(outputPath + str(num) + "_" + str(num2) + "_rottra.pdb", "w") for model in structure.get_list(): for chain in model.get_list(): for residue in chain.get_list(): for atom in residue.get_list(): x = (atom.get_coord()[0]) y = (atom.get_coord()[1]) z = (atom.get_coord()[2]) nx = 0.0 ny = 0.0 nz = 0.0 if centroidCA != None: # SUBTRACT TO EACH STRUCTURE THE CORRESPONDING CENTROID nx = (x - centroidCA[0]) + vect[0] ny = (y - centroidCA[1]) + vect[1] nz = (z - centroidCA[2]) + vect[2] else: nx = x + vect[0] ny = y + vect[1] nz = z + vect[2] ATOM_FORMAT_STRING = "%s%6i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%6.2f%6.2f %4s%2s%2s\n" hetfield, resseq, icode = atom.get_parent().get_id() args = ("ATOM ", atom.get_serial_number(), atom.get_fullname(), atom.get_altloc(), atom.get_parent().get_resname(), atom.get_parent().get_parent().get_id(), resseq, icode, nx, ny, nz, atom.get_occupancy(), 30.00, atom.get_parent().get_segid(), atom.get_name()[0], " ") ala = ATOM_FORMAT_STRING % args pdb.write(ala) pdb.close() def translateStructurebyCentroidMass(num, num2, structureMove, structureRef, outputPath): """ :param num: :type num: :param num2: :type num2: :param structureMove: :type structureMove: :param structureRef: :type structureRef: :param outputPath: :type outputPath: :return: :rtype: """ # COMPUTE CENTROID OF MASS OF THE FRAGMENT TO MOVE AND REFERENCE listCAs1 = [] for model in structureMove: for chain in model: for residue in chain: # coord = residue["CA"].get_coord() for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs1.append([coord[0], coord[1], coord[2]]) # listCAs1.append(residue["CA"]) listCAs2 = [] for model in structureRef: for chain in model: for residue in chain: for atom in residue: if atom.get_id() in ["CA", "C", "O", "N"]: coord = atom.get_coord() listCAs2.append([coord[0], coord[1], coord[2]]) # listCAs2.append(residue["CA"]) if len(listCAs2) > len(listCAs1): how = -1 * (len(listCAs2) - len(listCAs1)) listCAs2 = listCAs2[:how] elif len(listCAs1) > len(listCAs2): how = -1 * (len(listCAs1) - len(listCAs2)) listCAs1 = listCAs1[:how] """ super_imposer = Superimposer() super_imposer.set_atoms(listCAs2, listCAs1) rot, tran = super_imposer.rotran rot=rot.astype('f') tran=tran.astype('f') """ moveCas = numpy.array(listCAs1) centroidMove = numpy.mean(moveCas, axis=0) refCas = numpy.array(listCAs2) centroidRef = numpy.mean(refCas, axis=0) # COMPUTE THE VECTOR BETWEEN THE TWO CENTROIDS A,B,C = XCREF-XCMOVE,YCREF-XCMOVE,ZCREF-YCMOVE a = centroidRef[0] - centroidMove[0] b = centroidRef[1] - centroidMove[1] c = centroidRef[2] - centroidMove[2] # a = centroidMove[0]-centroidRef[0] # b = centroidMove[1]-centroidRef[1] # c = centroidMove[2]-centroidRef[2] vect = [a, b, c] # print "Translation Vector",vect #tran # SEND TO TRANSLATE STRUCTUREBYVECTOR # print centroidRef, centroidMove, vect # translateStructurebyVector(num,num2,structureMove,tran,outputPath) translateStructurebyVector(num, num2, structureMove, vect, outputPath) def __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=None, getGlobalStats=False, forceGetTFZ=False): """ :param DicParameters: :type DicParameters: :param ClusAll: :type ClusAll: :param ensembles: :type ensembles: :param frag_fixed: :type frag_fixed: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param getGlobalStats: :type getGlobalStats: :param forceGetTFZ: :type forceGetTFZ: :return: :rtype: """ global MAP_OF_ROT_COMB CluWork = [] if frag_fixed > 1: merged = __mergeEquivalentRotCombination() for key in merged: dictio = {"heapSolutions": ADT.Heap()} alli = [key] + merged[key] for clu in ClusAll: for item in clu["heapSolutions"].asList(): prio, rota = item if rota["n_prev_cluster"] not in alli: break dictio["heapSolutions"].push(prio, rota) CluWork.append(dictio) else: CluWork = ClusAll numc = 0 listRotaClus = [] stats = {} zmax = 0 tfzmax = 0 llgmax = 0 distinctall = [] for clu in CluWork: if len(clu["heapSolutions"].asList()) == 0: continue numc = clu["heapSolutions"].asList()[0][1]["n_prev_cluster"] # print "N.Cluster",numc if LIMIT_CLUSTER != None and LIMIT_CLUSTER != numc: continue for rel in MAP_OF_ROT_COMB.keys(): val = MAP_OF_ROT_COMB[rel] # print "!!!!-----!!!!---!!!!!",val,numc,val==numc,type(val),type(numc),rel if val == numc: numc = tuple(sorted(map(lambda x: int(x), json.loads(str(rel).replace("(", "[").replace(")", "]"))))) #print "////////////////",rel,numc break distinct_pdbs = [] nrts = len(clu["heapSolutions"].asList()) llg_all = [] zscore_all = [] tfz_all = [] for item in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): prio, rota = item llg_all.append(rota["llg"]) zscore_all.append(rota["zscore"]) if "tfz==" in rota: tfz_all.append(rota["tfz=="]) elif forceGetTFZ: tfz_all.append(0.0) if rota["llg"] > llgmax: llgmax = rota["llg"] if rota["zscore"] > zmax: zmax = rota["zscore"] if "tfz==" in rota and rota["tfz=="] > tfzmax: tfzmax = rota["tfz=="] pdbname = ensembles[rota["name"]] corresponding = (pdbname.split("/"))[-1] listona = corresponding.split("_") pdbid = listona[0] model = listona[1] idSolution = listona[-1] idSolution, ext = idSolution.split(".") if pdbid not in distinct_pdbs: distinct_pdbs.append(pdbid) if pdbid not in distinctall: distinctall.append(pdbid) llg_avg = float(numpy.mean(numpy.array(llg_all))) zscore_avg = float(numpy.mean(numpy.array(zscore_all))) llg_std = float(numpy.std(numpy.array(llg_all))) llg_min = float(numpy.min(numpy.array(llg_all))) llg_max = float(numpy.max(numpy.array(llg_all))) zscore_max = float(numpy.max(numpy.array(zscore_all))) if len(tfz_all) > 0: tfz_avg = float(numpy.mean(numpy.array(tfz_all))) tfz_max = float(numpy.max(numpy.array(tfz_all))) distpdb = len(distinct_pdbs) listRotaClus.append((distpdb, llg_max, numc)) if len(tfz_all) > 0: stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min, tfz_max, tfz_avg] else: stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min] if getGlobalStats: return stats, listRotaClus, len(distinctall), llgmax, zmax else: return stats, listRotaClus def __getStatFromSumAndModel(sumPath, modelo, fullmodel, cluster, mode, n_ense=None, getEnsemble=False): """ :param sumPath: :type sumPath: :param modelo: :type modelo: :param fullmodel: :type fullmodel: :param cluster: :type cluster: :param mode: :type mode: :param n_ense: :type n_ense: :param getEnsemble: :type getEnsemble: :return: :rtype: """ a_tuple = __getStatFromSumAndModel_priv(sumPath, modelo, fullmodel, cluster, mode, n_ense=None, fullname=True, getEnsemble=getEnsemble) if a_tuple[0] is None: a_tuple = __getStatFromSumAndModel_priv(sumPath, modelo, fullmodel, cluster, mode, n_ense=None, fullname=False, getEnsemble=getEnsemble) return a_tuple def __getStatFromSumAndModel_priv(sumPath, modelo, fullmodel, cluster, mode, n_ense=None, fullname=False, getEnsemble=False): """ :param sumPath: :type sumPath: :param modelo: :type modelo: :param fullmodel: :type fullmodel: :param cluster: :type cluster: :param mode: :type mode: :param n_ense: :type n_ense: :param fullname: :type fullname: :param getEnsemble: :type getEnsemble: :return: :rtype: """ Clu, dicname = readClustersFromSUM(sumPath) # print "##",sumPath # print "Numero clusters",len(Clu) # for rf in Clu: # print "Numero rot.",len(rf["heapSolutions"].asList()) rop = None topLLG = None topZSCORE = None posRank = None pos = 1 frag = -1 rp = None if mode == "ARCIMBOLDO": newPath = os.path.join(os.path.split(os.path.dirname(sumPath))[0], "5_RNP_LIBRARY/clusters.sum") if newPath != sumPath: tupl = __getStatFromSumAndModel_priv(newPath, fullmodel, fullmodel, cluster, mode, fullname=fullname) rp = tupl[0] if cluster == None: Clu = sorted(Clu, __cmp_cluster, reverse=True) for clust in Clu: if cluster != None: pos = 1 for item in clust["heapSolutions"]: prio, rota = item actualmodel = "" model = copy.deepcopy(modelo) if cluster != None and rota["n_prev_cluster"] != int(cluster): break actualmodel = os.path.basename(dicname[rota["name"]]) if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES", "RNP"]: if not ("FR" in actualmodel and "xx" in actualmodel): ad = actualmodel.split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[2].split("-")[0] elif mode in ["ARCIMBOLDO-SHREDDER"]: # ad = actualmodel.split("_") # actualmodel = ad[0]+"_0_0.pdb" # ad = model.split("_") # model = ad[0]+"_0_0.pdb" # NOTE: The name of the model is already correct, and the first _0_ might not be 0 if the run comes from spheres pass elif mode == "ARCIMBOLDO": # print os.path.basename(dicname[rota["name"]])[:-4] # print "Analyzing",rota["name"] frag = int(model.split("FR")[1].split("_")[0]) frar = int(rota["name"].split("FR")[1].split("_")[0]) actualmodel = "" # print "Found",frag,"in model and",frar,"rota_name" if frag == frar: if fullname: actualmodel = rota["name"] + ".pdb" model = model else: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" # .split("_")[0] model = "xx" + model.split("xx")[1] # print "////....////",actualmodel,model # if actualmodel == model: # print actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) elif rp != None and "fixed_frags" in rp: for fri in rp["fixed_frags"]: frag = int(fri["name"].split("FR")[1].split("_")[0]) # print "--------------frag",frag,"------------------frar",frar,"friname",fri["name"] if frag == frar: if fullname: actualmodel = rota["name"] + ".pdb" if len(actualmodel.split("-")) > 1: model = fri["name"] + ".pdb" elif len(fri["name"].split("-")) > 1: model = fri["name"].split("-")[0] + ".pdb" else: model = fri["name"] + ".pdb" # print "----....----...",actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) break else: actualmodel = rota["name"].split("ensembleID")[1] + ".pdb" if len(actualmodel.split("-")) > 1: model = fri["name"].split("ensembleID")[1] + ".pdb" elif len(fri["name"].split("ensembleID")[1].split("-")) > 1: model = fri["name"].split("ensembleID")[1].split("-")[0] + ".pdb" else: model = fri["name"].split("ensembleID")[1] + ".pdb" # print "----....----...",actualmodel, model,len(clust["heapSolutions"].asList()),actualmodel==model,len(actualmodel),len(model) break # model = model.split(".")[0] if model.endswith(".pdb") and not actualmodel.endswith(".pdb"): actualmodel += ".pdb" elif not model.endswith(".pdb") and actualmodel.endswith(".pdb"): actualmodel = actualmodel[:-4] # print "=====",n_ense,actualmodel,model,rota["name"] if (n_ense == None and actualmodel == model) or ( actualmodel == model and int(rota["name"].split("-")[1]) == int(n_ense)): rop = rota posRank = pos if rota["llg"] > topLLG: topLLG = rota["llg"] if rota["zscore"] > topZSCORE: topZSCORE = rota["zscore"] pos += 1 if getEnsemble: ensem = {} ensem[rop["name"]] = dicname[rop["name"]] if "fixed_frags" in rop: for fra in rop["fixed_frags"]: ensem[fra["name"]] = dicname[fra["name"]] return rop, topLLG, topZSCORE, posRank, ensem else: return rop, topLLG, topZSCORE, posRank def __getStatFromSumCCAndModel(pathBCC, pathACC, model, mode): """ :param pathBCC: :type pathBCC: :param pathACC: :type pathACC: :param model: :type model: :param mode: :type mode: :return: :rtype: """ befref = [] aftref = [] if pathBCC != None: befref, con2 = readCCValFromSUM(pathBCC) if pathACC != None: aftref, con1 = readCCValFromSUM(pathACC) listcc_before = [] listcc_after = [] for mod in befref: listcc_before.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_before = sorted(listcc_before, reverse=True) posB = 1 ropB = None topINITCC_B = None if pathBCC != None: topINITCC_B = listcc_before[0][0] posRankB = None for item in listcc_before: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) if actualmodel == model: ropB = item posRankB = posB break posB += 1 for mod in aftref: listcc_after.append((mod["initcc"], mod["ner"], mod["corresp"])) listcc_after = sorted(listcc_after, reverse=True) posA = 1 ropA = None topINITCC_A = None if pathACC: topINITCC_A = listcc_after[0][0] posRankA = None for item in listcc_after: actualmodel = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = os.path.basename(item[2]).split("_") actualmodel = ad[0] + "_" + ad[1] + "_" + ad[-1] else: actualmodel = os.path.basename(item[2]) # print mode,actualmodel,model if actualmodel == model: # if os.path.basename(item[2]) == model: ropA = item posRankA = posA break posA += 1 return ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA def generatePDFGraph(current_dir, nameOutput, title, data): """ :param current_dir: :type current_dir: :param nameOutput: :type nameOutput: :param title: :type title: :param data: :type data: :return: :rtype: """ data = sorted(data) helil = map(lambda x: x[0], data) llgs = map(lambda x: x[1], data) rmsds = map(lambda x: x[2], data) ress = map(lambda x: x[3], data) nomepdf = os.path.join(current_dir, nameOutput + ".pdf") def make_colormap(seq): """Return a LinearSegmentedColormap seq: a sequence of floats and RGB-tuples. The floats should be increasing and in the interval (0,1). """ seq = [(None,) * 3, 0.0] + list(seq) + [1.0, (None,) * 3] cdict = {'red': [], 'green': [], 'blue': []} for i, item in enumerate(seq): if isinstance(item, float): r1, g1, b1 = seq[i - 1] r2, g2, b2 = seq[i + 1] cdict['red'].append([item, r1, r2]) cdict['green'].append([item, g1, g2]) cdict['blue'].append([item, b1, b2]) return mcolors.LinearSegmentedColormap('CustomMap', cdict) c = mcolors.ColorConverter().to_rgb rvb = make_colormap( [c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.scatter(array_dg[:, 0], array_dg[:, 1], c=rmsd, cmap=rvb) plt.colorbar() plt.show() with PdfPages(nomepdf) as pdf: rvb = make_colormap([c('red'), c('violet'), 0.33, c('violet'), c('blue'), 0.66, c('blue')]) plt.figure(figsize=(3, 3)) plt.title('RMSD') plt.scatter(helil, llgs, c=rmsds, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() rvb = make_colormap([c('green'), c('blue'), 0.33, c('violet'), c('black'), 0.66, c('black')]) plt.rc('text', usetex=True) plt.figure(figsize=(3, 3)) plt.title('Resolution') plt.scatter(helil, llgs, c=ress, cmap=rvb) plt.colorbar() pdf.savefig() # saves the current figure into a pdf page plt.close() # NOTE CM and PT: This function is not currently called def __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res): """ :param data: :type data: :param first_rmsd: :type first_rmsd: :param last_rmsd: :type last_rmsd: :param first_res: :type first_res: :param last_res: :type last_res: :return: :rtype: """ full = "" base = "[\n" base += "['Helix', 'LLG', {'type': 'string', 'role': 'style'}, {'type':'string', 'role':'tooltip'}],\n" full += base for it in range(len(data)): item = data[it] # print "========",item[3],first_res, last_res,SystemUtility.htmlRgb(item[3], first_res, last_res,"blue") stroke_color = "" + str(SystemUtility.htmlRgb(item[3], first_res, last_res, "blue")) # hexadecimal stroke_opacity = "0.6" # float stroke_width = "1" # integer # print "********",item[2],first_rmsd, last_rmsd,SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd,"red") fill_color = "" + str(SystemUtility.htmlRgb(item[2], first_rmsd, last_rmsd, "red")) # hexadecimal fill_opacity = "1.0" full += "[" + str('%.2f' % item[0]) + ", " + str('%.2f' % (item[1] / item[ 0])) + ", 'point {stroke-color: " + stroke_color + "; stroke-opacity: " + stroke_opacity + "; stroke-width: " + stroke_width + "; fill-color: " + fill_color + "; fill-opacity: " + fill_opacity + "}','LLG: " + str( '%.2f' % item[1]) + ", RMSD: " + str('%.2f' % item[2]) + ", Res: " + str('%.2f' % item[3]) + "']" if it == len(data) - 1: full += "\n" else: full += ",\n" full += "]\n" return full # NOTE CM and PT: This function is not currently called def __extractTablesGraph(data): dic_rmsd_results = {} dic_res_results = {} full = "" data = sorted(data) first_rmsd = min(map(lambda x: x[2], data)) last_rmsd = max(map(lambda x: x[2], data)) first_res = min(map(lambda x: x[3], data)) last_res = max(map(lambda x: x[3], data)) dic_rmsd = {} dic_res = {} for it in range(len(data)): item = data[it] key_rmsd = '%.2f' % item[2] key_res = '%.2f' % item[3] if key_rmsd not in dic_rmsd.keys(): dic_rmsd[key_rmsd] = [item] else: dic_rmsd[key_rmsd].append(item) if key_res not in dic_res.keys(): dic_res[key_res] = [item] else: dic_res[key_res].append(item) full = __writeLineTable(data, first_rmsd, last_rmsd, first_res, last_res) for key_rmsd in dic_rmsd.keys(): value_rmsd = dic_rmsd[key_rmsd] tabella = __writeLineTable(value_rmsd, first_rmsd, last_rmsd, first_res, last_res) dic_rmsd_results[key_rmsd] = tabella for key_res in dic_res.keys(): value_res = dic_res[key_res] tabella = __writeLineTable(value_res, first_rmsd, last_rmsd, first_res, last_res) dic_res_results[key_res] = tabella return dic_rmsd_results, dic_res_results, full def writeOutputFile(lock, DicParameters, ClusAll, outputDir, filename, mode, step, ensembles, frag_fixed, LIMIT_CLUSTER=None, path1=None, path2=None, useRefP1=False, useRGR=False, numberCyclesRef=1, usePacking=True, useTransla=True, makeEmpty=False, readSum=None, filterClusters=True, fromphis=False, fromdirexp="11.EXP", coiled_coil=False, dizio_verification=None, message="", solved=None): """ :param lock: :type lock: :param DicParameters: :type DicParameters: :param ClusAll: :type ClusAll: :param outputDir: :type outputDir: :param filename: :type filename: :param mode: :type mode: :param step: :type step: :param ensembles: :type ensembles: :param frag_fixed: :type frag_fixed: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param path1: :type path1: :param path2: :type path2: :param useRefP1: :type useRefP1: :param useRGR: :type useRGR: :param numberCyclesRef: :type numberCyclesRef: :param usePacking: :type usePacking: :param useTransla: :type useTransla: :param makeEmpty: :type makeEmpty: :param readSum: :type readSum: :param filterClusters: :type filterClusters: :param fromphis: :type fromphis: :param fromdirexp: :type fromdirexp: :param coiled_coil: :type coiled_coil: :param dizio_verification: :type dizio_verification: :param message: :type message: :param solved: :type solved: :return: :rtype: """ # print "Trying getting lock, in Write XML",mode,step lock.acquire() # print "Log adquired!!!!!!!!!!!!!!!!!!!!!!!!!",mode,step SOLVED = False print "MODE:",mode,"STEP",step if readSum != None and os.path.exists(readSum): ensembles, ClusAll, RotClu, encn = readClustersFromSUMToDB(DicParameters, readSum, "ROTSOL") nomexml = os.path.join(outputDir, filename + ".xml") if not os.path.exists(nomexml): lock.release() return tree = ET.parse(nomexml) root = tree.getroot() if mode == "ARCIMBOLDO-SHREDDER" and step == "CREATE_LINK": if root.find('LINKS/number') is None: ET.SubElement(root, 'LINKS') ET.SubElement(root.find('LINKS'), 'number').text = str(len(ClusAll)) for qds in range(len(ClusAll)): link = ClusAll[qds] ET.SubElement(root.find('LINKS'), "N" + str(qds)) ET.SubElement(root.find('LINKS/N' + str(qds)), 'text').text = link[0] ET.SubElement(root.find('LINKS/N' + str(qds)), 'url').text = link[1] else: acnumb = int(root.find('LINKS/number').text) root.find('LINKS/number').text = str(acnumb + len(ClusAll)) for qds in range(len(ClusAll)): link = ClusAll[qds] ET.SubElement(root.find('LINKS'), "N" + str(qds + acnumb)) ET.SubElement(root.find('LINKS/N' + str(qds + acnumb)), 'text').text = link[0] ET.SubElement(root.find('LINKS/N' + str(qds + acnumb)), 'url').text = link[1] if mode == "ARCIMBOLDO-SKIP" and step == "JUMP": # flin = os.path.join(outputDir,"./ens1_frag"+str(frag_fixed+1)+"/1_FRF_LIBRARY/clusters.sum") # ensembles = readClustersFromSUMToDB(DicParameters, flin, "ROTSOL") # writeOutputFile(lock,DicParameters,outputDir,filename,"ARCIMBOLDO","TABLE",ensembles,frag_fixed) # writeOutputFile(lock,DicParameters,outputDir,filename,"ARCIMBOLDO","JUMPFRAG_"+str(frag_fixed)+"_"+str(frag_fixed+1),ensembles,frag_fixed) ET.SubElement(root, 'ens1_frag' + str(frag_fixed)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'allclus').text = "[]" if mode == "ARCIMBOLDO" and step.startswith("JUMPFRAG_"): st, from_fr, to_fr = step.split("_") (root.find('ens1_frag' + from_fr)).tag = 'ens1_frag' + to_fr clustn = json.loads(root.find("ens1_frag" + str(to_fr) + "/FRF/allclus").text) allc = [] for key in clustn: start_table_row = "" body_table_row = "" #print("LALALA",key) keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") valonome = "" valodue = "" # print "----",keyd,step if int(from_fr) > 1: base = (keyd.split("-")[1]).split("_") dr = {} for br in base: if br in dr.keys(): dr[br] += 1 else: dr[br] = 1 minl = 100000 d = -1 for q in dr.keys(): if dr[q] <= minl: minl = dr[q] d = q base.append(d) base = sorted(base) valonome = ", ".join(base) valodue = "_".join(base) valonome = "(" + valonome + ")" valodue = "C-" + valodue + "-" else: valonome = "(" valodue = "C-" for ds in range(2): valonome += keyd + ", " valodue += keyd + "_" valonome = valonome[:-2] + ")" valodue = valodue[:-1] + "-" # print "aggiungo",valonome allc.append(valonome) adx = root.find("ens1_frag" + str(to_fr) + "/FRF/C" + keyd) # print adx adx.text = valonome # adx.find("Cluster").text = valonome root.find("ens1_frag" + str(to_fr) + "/FRF/C" + keyd).tag = valodue # print "all list",allc root.find("ens1_frag" + str(to_fr) + "/FRF/allclus").text = json.dumps(allc) ET.SubElement(root, 'ens1_frag' + str(from_fr)) ET.SubElement(root.find('ens1_frag' + str(from_fr)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(from_fr) + '/FRF'), 'allclus').text = "[]" if mode == "ARCIMBOLDO" and step == "VERIFICATION": ET.SubElement(root, 'VERIFICATION') ET.SubElement(root, 'VERIFICATION') nm = sorted(dizio_verification.keys()) cc = [dizio_verification[n]["cc"] for n in nm] wMPE = [dizio_verification[n]["wMPE"] for n in nm] inco = [dizio_verification[n]["incorrect"] for n in nm] ET.SubElement(root.find('VERIFICATION'), 'CC').text = json.dumps(cc) ET.SubElement(root.find('VERIFICATION'), 'wMPE').text = json.dumps(wMPE) ET.SubElement(root.find('VERIFICATION'), 'incorrect').text = json.dumps(inco) for key in dizio_verification: if "WRO" in key: ET.SubElement(root.find('VERIFICATION'), 'wrong').text = json.dumps([dizio_verification[key]["cc"],dizio_verification[key]["wMPE"]]) break ET.SubElement(root.find('VERIFICATION'), 'message').text = message ET.SubElement(root.find('VERIFICATION'), 'solved').text = str(solved) if mode == "ARCIMBOLDO" or mode == "ARCIMBOLDO-BORGES" or mode == "ARCIMBOLDO-SHREDDER": if mode == "ARCIMBOLDO" and step == "INITCC": aftref, con1 = readCCValFromSUM(path1) stats = {} for mod in aftref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'INITCC') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'allclus').text = json.dumps( map(lambda x: "(" + str(x).replace("_", ", ") + ")" if str(x).replace("_", ", ") != x else str(x), sorted(stats.keys()))) bestinitcc = 0 bestclusterinitcc = "" for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] k = str(key).replace("_", ", ") if k != key: key = "(" + k + ")" keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/' + "C" + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestclusterinitcc').text = bestclusterinitcc elif mode.startswith("ARCIMBOLDO-BORGES") and step == "INITCC": befref, con2 = readCCValFromSUM(path1) aftref, con1 = readCCValFromSUM(path2) listcc_before = [] listcc_after = [] cluster = int(path1.split("/")[-2]) for mod in befref: listcc_before.append((mod["initcc"], mod["ner"])) listcc_before = sorted(listcc_before, reverse=True) for mod in aftref: listcc_after.append((mod["initcc"], mod["ner"])) listcc_after = sorted(listcc_after, reverse=True) stats = {} for mod in aftref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) if root.find('ens1_frag' + str(frag_fixed) + '/INITCC') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'INITCC') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'BEFORE') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'AFTER') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus')).text = json.dumps(list(set( json.loads((root.find('ens1_frag' + str(frag_fixed) + '/INITCC/allclus')).text) + map( lambda x: str(x), sorted(stats.keys()))))) bestinitcc = 0 bestclusterinitcc = "" for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/AFTER/C' + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) stats = {} for mod in befref: if mod["cluster"] not in stats.keys(): stats[mod["cluster"]] = [(mod["initcc"], mod["ner"])] else: stats[mod["cluster"]].append((mod["initcc"], mod["ner"])) stats[mod["cluster"]] = sorted(stats[mod["cluster"]], reverse=True) for key in sorted(stats.keys()): value = stats[key][0] if makeEmpty: value = [0.0, 0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'initcc').text = str('%.2f' % value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC/BEFORE/C' + str(keyd)), 'ner').text = str(value[1]) if value[0] > bestinitcc: bestinitcc = value[0] bestclusterinitcc = str(key) if root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestclusterinitcc') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestclusterinitcc').text = bestclusterinitcc ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/INITCC'), 'bestinitcc').text = str( '%.2f' % bestinitcc) elif float(bestinitcc) > float((root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestinitcc')).text): (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestclusterinitcc')).text = bestclusterinitcc (root.find('ens1_frag' + str(frag_fixed) + '/INITCC/bestinitcc')).text = str('%.2f' % bestinitcc) performBacktracing = False backPDB = None FINALCC = None RESIDTR = None cluster = None if mode == 'ARCIMBOLDO' and step.startswith('FAST'): modality = step.split('_') typemode = modality[0] cycle = modality[1] # fragment = modality[2] p = subprocess.Popen('grep -H CC ' + path1 + '*.pdb', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) backPDB = finalcc[0][2] frag_fixed = int(os.path.basename(backPDB).split("FR")[1].split("_")[0]) + 1 fragment = frag_fixed if root.find('ens1_frag' + str(frag_fixed) + '/EXP') is not None: ficc = root.find('ens1_frag' + str(frag_fixed) + '/EXP/finalcc').text if float(finalcc[0][0]) > float(ficc): backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] modello = os.path.basename(backPDB) modelloFRF = modello.split('-')[0] + '.pdb' pathFRF = os.path.join(outputDir + '/ens1_frag' + str(frag_fixed), '1_FRF_LIBRARY/clusters.sum') rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) cluster = rop['original_rotcluster'] druppo = str(cluster) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) cluster = druppo if float(FINALCC) >= float(root.find('backtracing/EXP/finalcc').text): performBacktracing = True k = str(cluster).replace('_', ', ') if k != cluster: cluster = '(' + k + ')' root.find('ens1_frag' + str(frag_fixed) + '/EXP/Cluster').text = str(cluster) root.find('ens1_frag' + str(frag_fixed) + '/EXP/finalcc').text = str(FINALCC) root.find('ens1_frag' + str(frag_fixed) + '/EXP/restraced').text = str(RESIDTR) root.find('ens1_frag' + str(frag_fixed) + '/EXP/backpdb').text = str(backPDB) root.find('ens1_frag' + str(frag_fixed) + '/EXP/cycle').text = str(cycle) else: backPDB = None FINALCC = None RESIDTR = None cluster = None performBacktracing = False else: backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] modello = os.path.basename(backPDB) # print "----",backPDB modelloFRF = modello.split('-')[0] + '.pdb' pathFRF = os.path.join(outputDir + '/ens1_frag' + str(frag_fixed), '1_FRF_LIBRARY/clusters.sum') rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) cluster = rop['original_rotcluster'] druppo = str(cluster) if "_" in druppo: druppo = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), druppo.split("_"))))) cluster = druppo k = str(cluster).replace('_', ', ') if k != cluster: cluster = '(' + k + ')' ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'EXP') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'Cluster').text = str(cluster) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'finalcc').text = str(finalcc[0][0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'restraced').text = str(finalcc[0][1]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'backpdb').text = str(finalcc[0][2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'cycle').text = str(cycle) if root.find('backtracing/EXP/finalcc') is None or float(FINALCC) >= float( root.find('backtracing/EXP/finalcc').text): performBacktracing = True if (mode.startswith('ARCIMBOLDO-BORGES') or mode.startswith('ARCIMBOLDO-SHREDDER')) and step.startswith('FAST'): modality = step.split('_') typemode = modality[0] cycle = modality[1] cluster = modality[2] p = subprocess.Popen('grep -H CC ' + path1 + '*.pdb', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)) is not None: ficc = root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/finalcc').text if float(finalcc[0][0]) > float(ficc): backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] performBacktracing = True root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/Cluster').text = str(cluster) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/finalcc').text = str(FINALCC) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/restraced').text = str( RESIDTR) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/backpdb').text = str(backPDB) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/cycle').text = str(cycle) root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster) + '/mode').text = str(typemode) else: backPDB = None FINALCC = None RESIDTR = None performBacktracing = False else: backPDB = finalcc[0][2] FINALCC = finalcc[0][0] RESIDTR = finalcc[0][1] if root.find('ens1_frag' + str(frag_fixed) + '/EXP') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'EXP') if root.find('ens1_frag' + str(frag_fixed) + '/EXP/' + 'C' + str(cluster)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP'), 'C' + str(cluster)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'Cluster').text = str( cluster) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'finalcc').text = str( finalcc[0][0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'restraced').text = str(finalcc[0][1]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'backpdb').text = str( finalcc[0][2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'cycle').text = str( cycle) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/EXP/C' + str(cluster)), 'mode').text = str( typemode) performBacktracing = True if step == "FTF": stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed) + '/FTF') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FTF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/FTF/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/FTF/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Translations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FTF/' + "C" + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if step == "PACK": stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed) + '/PACK') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'PACK') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/PACK/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/PACK/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) bestllg = 0 bestzscore = 0 bestclusterllg = "" bestclusterzscore = "" for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(" ", "").replace("(", "-").replace(")", "-").replace(",", "_").replace("[", "-").replace( "]", "-") if root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), "C" + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Translations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK/' + "C" + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if value[2] > bestllg: bestllg = value[2] bestclusterllg = str(key) if value[4] > bestzscore: bestzscore = value[4] bestclusterzscore = str(key) if root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterllg') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestclusterllg').text = bestclusterllg ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestclusterzscore').text = bestclusterzscore ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestllg').text = str( '%.2f' % bestllg) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/PACK'), 'bestzscore').text = str( '%.2f' % bestzscore) else: if bestllg > float(root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestllg').text): root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterllg').text = bestclusterllg root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestllg').text = str('%.2f' % bestllg) if bestzscore > float(root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestzscore').text): root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestclusterzscore').text = bestclusterzscore root.find('ens1_frag' + str(frag_fixed) + '/PACK/bestzscore').text = str('%.2f' % bestzscore) if step == 'RNP': stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, forceGetTFZ=True) if root.find('ens1_frag' + str(frag_fixed) + '/RNP') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'RNP') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: root.find('ens1_frag' + str(frag_fixed) + '/RNP/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/RNP/allclus').text) + map(lambda x: str(x), sorted( stats.keys()))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, 0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Solutions').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Top_tfz').text = str('%.2f' % value[8]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/RNP/' + 'C' + str(keyd)), 'Mean_tfz').text = str('%.2f' % value[9]) if step == 'TABLE': numeroClusters = len(ClusAll) stats, listRotaClus = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER) if root.find('ens1_frag' + str(frag_fixed)) is None: ET.SubElement(root, 'ens1_frag' + str(frag_fixed)) if root.find('ens1_frag' + str(frag_fixed) + '/FRF') is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed)), 'FRF') ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'allclus').text = json.dumps( map(lambda x: str(x), sorted(stats.keys()))) else: altt = stats.keys() finalt = [] for ask in altt: if isinstance(ask, tuple) and len(ask) == frag_fixed: finalt.append(ask) elif not isinstance(ask, tuple) and frag_fixed == 1: finalt.append(ask) root.find('ens1_frag' + str(frag_fixed) + '/FRF/allclus').text = json.dumps(list(set( json.loads(root.find('ens1_frag' + str(frag_fixed) + '/FRF/allclus').text) + map(lambda x: str(x), sorted(finalt))))) for key in sorted(stats.keys()): value = stats[key] if makeEmpty: value = [0, \ 0, 0.0, 0.0, 0.0, 0.0] keyd = str(key).replace(' ', '').replace('(', '-').replace(')', '-').replace(',', '_').replace('[', '-').replace( ']', '-') if root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)) is None: ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF'), 'C' + str(keyd)) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Cluster').text = str(key) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Rotations').text = str(value[0]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Top_llg').text = str('%.2f' % value[2]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Mean_llg').text = str('%.2f' % value[3]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Top_zscore').text = str('%.2f' % value[4]) ET.SubElement(root.find('ens1_frag' + str(frag_fixed) + '/FRF/' + 'C' + str(keyd)), 'Mean_zscore').text = str('%.2f' % value[5]) if mode == 'ARCIMBOLDO' and performBacktracing: modality = step.split('_') typemode = modality[0] cycle = modality[1] # fragment = modality[2] backPDA = backPDB[:-4] + '.pda' listus = os.path.basename(backPDB).split('_') pdbid = listus[0] model = listus[1] IdSolution = listus[-1] IdSolution = IdSolution[:-4] modello = os.path.basename(backPDB) modelloFRF = modello.split("-")[0] + ".pdb" if root.find('backtracing') is None: ET.SubElement(root, 'backtracing') ET.SubElement(root.find('backtracing'), 'model').text = str(modello) ET.SubElement(root.find('backtracing'), 'finalcc').text = str('%.2f' % FINALCC) ET.SubElement(root.find('backtracing'), 'restraced').text = str(RESIDTR) ET.SubElement(root.find('backtracing'), 'file').text = str(backPDB) else: root.find('backtracing/model').text = str(modello) root.find('backtracing/finalcc').text = str('%.2f' % FINALCC) root.find('backtracing/restraced').text = str(RESIDTR) root.find('backtracing/file').text = str(backPDB) # FRF pathFRF = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "1_FRF_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modelloFRF, modello, None, mode) if root.find('backtracing/FRF') is None: ET.SubElement(root.find('backtracing'), 'FRF') ET.SubElement(root.find('backtracing/FRF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FRF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FRF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FRF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FRF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FRF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FRF/posRank').text = str(posRank) root.find('backtracing/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FRF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FRF/cluster').text = str(cluster) root.find('backtracing/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FRF/top_zscore').text = str('%.2f' % topZSCORE) if useTransla: # FTF pathFTF = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "3_FTF_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFTF, modello, modello, None, mode) if root.find('backtracing/FTF') is None: ET.SubElement(root.find('backtracing'), 'FTF') ET.SubElement(root.find('backtracing/FTF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FTF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FTF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FTF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FTF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FTF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FTF/posRank').text = str(posRank) root.find('backtracing/FTF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FTF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FTF/cluster').text = str(cluster) root.find('backtracing/FTF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FTF/top_zscore').text = str('%.2f' % topZSCORE) # PACK if usePacking: pathPACK = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "4_PACK_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathPACK, modello, modello, None, mode) if root.find('backtracing/PACK') is None: ET.SubElement(root.find('backtracing'), 'PACK') ET.SubElement(root.find('backtracing/PACK'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/PACK'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/PACK'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/PACK'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/PACK'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/PACK'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/PACK/posRank').text = str(posRank) root.find('backtracing/PACK/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/PACK/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/PACK/cluster').text = str(cluster) root.find('backtracing/PACK/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/PACK/top_zscore').text = str('%.2f' % topZSCORE) # RNP pathRNP = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "5_RNP_LIBRARY/clusters.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRNP, modello, modello, None, "RNP") if root.find('backtracing/RNP') is None: ET.SubElement(root.find('backtracing'), 'RNP') ET.SubElement(root.find('backtracing/RNP'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/RNP'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/RNP'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/RNP'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/RNP'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/RNP'), 'top_zscore').text = str('%.2f' % topZSCORE) ET.SubElement(root.find('backtracing/RNP'), 'tfz').text = str( '%.2f' % rop["tfz=="]) if "tfz==" in rop else "0.0" else: root.find('backtracing/RNP/posRank').text = str(posRank) root.find('backtracing/RNP/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/RNP/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/RNP/cluster').text = str(cluster) root.find('backtracing/RNP/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/RNP/top_zscore').text = str('%.2f' % topZSCORE) root.find('backtracing/RNP/tfz').text = str('%.2f' % rop["tfz=="]) if "tfz==" in rop else "0.0" # INITCC pathACC = os.path.join(outputDir + "/ens1_frag" + str(frag_fixed), "6_EXPVAL_LIBRARY/solCC.sum") ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA = __getStatFromSumCCAndModel(None, pathACC, modello, mode) if root.find('backtracing/INITCC') is None: ET.SubElement(root.find('backtracing'), 'INITCC') # AFTER REF ET.SubElement(root.find('backtracing/INITCC'), 'posRank').text = str(posRankA) ET.SubElement(root.find('backtracing/INITCC'), 'initcc').text = str('%.2f' % ropA[0]) ET.SubElement(root.find('backtracing/INITCC'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC'), 'top_initcc').text = str('%.2f' % topINITCC_A) else: root.find('backtracing/INITCC/posRank').text = str(posRankA) root.find('backtracing/INITCC/initcc').text = str('%.2f' % ropA[0]) root.find('backtracing/INITCC/cluster').text = str(cluster) root.find('backtracing/INITCC/top_initcc').text = str('%.2f' % topINITCC_A) if typemode == "FAST": ciclus = cycle # FAST # p = subprocess.Popen("grep -H CC "+os.path.join(outputDir+"/ens1_frag"+str(frag_fixed),"8_EXP_LIBRARY/"+str(ciclus)+"/*/"+os.path.basename(backPDB)), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) p = subprocess.Popen("grep -H CC " + path1 + "/*.pdb", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('backtracing/ALLCC') is None: ET.SubElement(root.find('backtracing'), 'ALLCC').text = json.dumps([fel[0] for fel in finalcc]) ET.SubElement(root.find('backtracing'), 'ALLNRES').text = json.dumps([fel[1] for fel in finalcc]) else: root.find('backtracing/ALLCC').text = json.dumps([fel[0] for fel in finalcc]) root.find('backtracing/ALLNRES').text = json.dumps([fel[0] for fel in finalcc]) if root.find('backtracing/EXP') is None: ET.SubElement(root.find('backtracing'), 'EXP') ET.SubElement(root.find('backtracing/EXP'), 'cycle').text = str(ciclus) ET.SubElement(root.find('backtracing/EXP'), 'finalcc').text = str('%.2f' % finalcc[0][0]) ET.SubElement(root.find('backtracing/EXP'), 'restraced').text = str('%.2f' % finalcc[0][1]) else: root.find('backtracing/EXP/cycle').text = str(ciclus) root.find('backtracing/EXP/finalcc').text = str('%.2f' % finalcc[0][0]) root.find('backtracing/EXP/restraced').text = str('%.2f' % finalcc[0][1]) shutil.copyfile(backPDB, os.path.join(outputDir, "best.pdb")) shutil.copyfile(backPDB[:-4] + ".pda", os.path.join(outputDir, "best.pda")) shutil.copyfile(backPDB[:-4] + ".phs", os.path.join(outputDir, "best.phs")) shutil.copyfile(backPDB[:-4] + ".lst", os.path.join(outputDir, "best.lst")) if float(FINALCC) >= 30: SOLVED = True elif ( mode.startswith("ARCIMBOLDO-BORGES") or mode.startswith( "ARCIMBOLDO-SHREDDER")) and performBacktracing and ( root.find('backtracing/finalcc') is None or ( float(FINALCC) >= float(root.find('backtracing/finalcc').text))): modality = step.split("_") typemode = modality[0] cycle = modality[1] cluster = modality[2] # backPDB = # FINALCC = # RESIDTR = # print "Momento prima dell'errore",backPDB,FINALCC,RESIDTR ad = os.path.basename(backPDB).split("_") pdbid = ad[0] model = ad[1] # ref = ad[-1] IdSolution = ad[2].split("-")[0] # ref = ref[:-4] ensem = os.path.basename(backPDB).split("-") if len(ensem) > 1: ensem = ensem[1].split("_")[0] else: ensem = None modello = pdbid + "_" + model + "_" + IdSolution + ".pdb" modelloTr = pdbid + "_" + model + "_" + ad[2] + ".pdb" if root.find('backtracing') is None: ET.SubElement(root, 'backtracing') ET.SubElement(root.find('backtracing'), 'model').text = str(modello) ET.SubElement(root.find('backtracing'), 'finalcc').text = str('%.2f' % FINALCC) ET.SubElement(root.find('backtracing'), 'restraced').text = str(RESIDTR) ET.SubElement(root.find('backtracing'), 'file').text = str(backPDB) else: root.find('backtracing/model').text = str(modello) root.find('backtracing/finalcc').text = str('%.2f' % FINALCC) root.find('backtracing/restraced').text = str(RESIDTR) root.find('backtracing/file').text = str(backPDB) # FRF pathFRF = os.path.join(outputDir, "1_FRF_Library/clustersNoRed.sum") # __getStatFromSumAndModel(sumPath,modelo,fullmodel,cluster,mode,n_ense=None) modfrf = modello # print modfrf if modello.split("_")[0].endswith("nogyre"): modfrf = modello.split("_")[0].replace("nogyre", "") + "_" + modello.split("_")[1] + "_" + \ modello.split("_")[2] rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modfrf, modfrf, None, mode) # print "============================" # print pathFRF, modfrf, modfrf # print rop, topLLG,topZSCORE,posRank,rop["llg"] # print "============================" if not fromphis: if root.find('backtracing/FRF') is None: ET.SubElement(root.find('backtracing'), 'FRF') ET.SubElement(root.find('backtracing/FRF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FRF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FRF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FRF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FRF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FRF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FRF/posRank').text = str(posRank) root.find('backtracing/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FRF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FRF/cluster').text = str(cluster) root.find('backtracing/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FRF/top_zscore').text = str('%.2f' % topZSCORE) if not fromphis: for ciclus in range(numberCyclesRef): if ciclus > 0: # FRF if useRGR and not modello.split("_")[0].endswith("nogyre"): pathRGR = os.path.join(outputDir, "3_RGR/" + str(cluster) + "/" + str(ciclus - 1) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRGR, modello, modello, cluster, mode) elif not modello.split("_")[0].endswith("nogyre"): pathFRF = os.path.join(outputDir, "4_FRF_LIBRARY/" + str(cluster) + "/" + str( ciclus) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFRF, modello, modello, cluster, mode) if root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF') is None: ET.SubElement(root.find('backtracing'), 'REF_M') ET.SubElement(root.find('backtracing/REF_M'), 'C' + str(ciclus)) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus)), 'FRF') ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'posRank').text = str( posRank) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'llg').text = str( '%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'zscore').text = str( '%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'cluster').text = str( cluster) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'top_llg').text = str( '%.2f' % topLLG) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF'), 'top_zscore').text = str( '%.2f' % topZSCORE) else: root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/posRank').text = str(posRank) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/zscore').text = str( '%.2f' % rop["zscore"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/cluster').text = str(cluster) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/REF_M/C' + str(ciclus) + '/FRF/top_zscore').text = str( '%.2f' % topZSCORE) if not fromphis: # REF P1 if useRefP1 and ciclus != numberCyclesRef - 1: pathREFP1 = os.path.join(outputDir, "3_RBR_P1_BRF/" + str(cluster) + "/" + str(ciclus) + "/models.sum") refval, pl = readRefFromSUM(pathREFP1) rop = None for ren in refval: acm = "" if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES", "RNP"]: acm = os.path.basename(ren["corresp"]) if mode in ["ANALYSIS_CLU", "ARCIMBOLDO-BORGES"]: ad = acm.split("_") acm = ad[0] + "_" + ad[1] + "_" + ad[-1] if mode == "ARCIMBOLDO-SHREDDER": pass if acm == modello: rop = ren break if root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1') is None: ET.SubElement(root.find('backtracing'), 'REF_M') ET.SubElement(root.find('backtracing/REF_M'), 'C' + str(ciclus)) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus)), 'RNP_P1') ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'llg').text = str( '%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'zscore').text = str( '%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1'), 'rmsd').text = str( rop["rmsd"]) else: root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/zscore').text = str( '%.2f' % rop["zscore"]) root.find('backtracing/REF_M/C' + str(ciclus) + '/RNP_P1/rmsd').text = str(rop["rmsd"]) mode = "ARCIMBOLDO-BORGES" if not fromphis: if useTransla: # FTF pathFTF = os.path.join(outputDir, "6_FTF_Library/" + str(cluster) + "/clustersNoRedPSol.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathFTF, modello, modello, None, mode, n_ense=ensem) if root.find('backtracing/FTF') is None: ET.SubElement(root.find('backtracing'), 'FTF') ET.SubElement(root.find('backtracing/FTF'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/FTF'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/FTF'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/FTF'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/FTF'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/FTF'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/FTF/posRank').text = str(posRank) root.find('backtracing/FTF/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/FTF/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/FTF/cluster').text = str(cluster) root.find('backtracing/FTF/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/FTF/top_zscore').text = str('%.2f' % topZSCORE) if not fromphis: # PACK if usePacking: pathPACK = os.path.join(outputDir, "7.5_PACK_Library/" + str(cluster) + "/clustersRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathPACK, modello, modello, None, mode, n_ense=ensem) if root.find('backtracing/PACK') is None: ET.SubElement(root.find('backtracing'), 'PACK') ET.SubElement(root.find('backtracing/PACK'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/PACK'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/PACK'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/PACK'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/PACK'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/PACK'), 'top_zscore').text = str('%.2f' % topZSCORE) else: root.find('backtracing/PACK/posRank').text = str(posRank) root.find('backtracing/PACK/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/PACK/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/PACK/cluster').text = str(cluster) root.find('backtracing/PACK/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/PACK/top_zscore').text = str('%.2f' % topZSCORE) if not fromphis: # RNP pathRNP = os.path.join(outputDir, "8_RBR/" + str(cluster) + "/clustersNoRed.sum") rop, topLLG, topZSCORE, posRank = __getStatFromSumAndModel(pathRNP, modello, modello, None, "RNP", n_ense=ensem) if root.find('backtracing/RNP') is None: ET.SubElement(root.find('backtracing'), 'RNP') ET.SubElement(root.find('backtracing/RNP'), 'posRank').text = str(posRank) ET.SubElement(root.find('backtracing/RNP'), 'llg').text = str('%.2f' % rop["llg"]) ET.SubElement(root.find('backtracing/RNP'), 'zscore').text = str('%.2f' % rop["zscore"]) ET.SubElement(root.find('backtracing/RNP'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/RNP'), 'top_llg').text = str('%.2f' % topLLG) ET.SubElement(root.find('backtracing/RNP'), 'top_zscore').text = str('%.2f' % topZSCORE) ET.SubElement(root.find('backtracing/RNP'), 'tfz').text = str( '%.2f' % rop["tfz=="]) if "tfz==" in rop else "0.0" else: root.find('backtracing/RNP/posRank').text = str(posRank) root.find('backtracing/RNP/llg').text = str('%.2f' % rop["llg"]) root.find('backtracing/RNP/zscore').text = str('%.2f' % rop["zscore"]) root.find('backtracing/RNP/cluster').text = str(cluster) root.find('backtracing/RNP/top_llg').text = str('%.2f' % topLLG) root.find('backtracing/RNP/top_zscore').text = str('%.2f' % topZSCORE) root.find('backtracing/RNP/tfz').text = str('%.2f' % rop["tfz=="]) if "tfz==" in rop else "0.0" if not fromphis: # INITCC pathBCC = os.path.join(outputDir, "9.5_EXP/" + str(cluster) + "/solCC.sum") pathACC = os.path.join(outputDir, "9_EXP/" + str(cluster) + "/solCC.sum") ropB, topINITCC_B, posRankB, ropA, topINITCC_A, posRankA = __getStatFromSumCCAndModel(pathBCC, pathACC, modelloTr, mode) if root.find('backtracing/INITCC') is None: ET.SubElement(root.find('backtracing'), 'INITCC') ET.SubElement(root.find('backtracing/INITCC'), 'BEFORE') # BEFORE REF ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'posRank').text = str(posRankB) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'initcc').text = str('%.2f' % ropB[0]) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC/BEFORE'), 'top_initcc').text = str('%.2f' % topINITCC_B) ET.SubElement(root.find('backtracing/INITCC'), 'AFTER') # AFTER REF ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'posRank').text = str(posRankA) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'initcc').text = str('%.2f' % ropA[0]) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'cluster').text = str(cluster) ET.SubElement(root.find('backtracing/INITCC/AFTER'), 'top_initcc').text = str('%.2f' % topINITCC_A) else: root.find('backtracing/INITCC/BEFORE/posRank').text = str(posRankB) root.find('backtracing/INITCC/BEFORE/initcc').text = str('%.2f' % ropB[0]) root.find('backtracing/INITCC/BEFORE/cluster').text = str(cluster) root.find('backtracing/INITCC/BEFORE/top_initcc').text = str('%.2f' % topINITCC_B) # AFTER REF root.find('backtracing/INITCC/AFTER/posRank').text = str(posRankA) root.find('backtracing/INITCC/AFTER/initcc').text = str('%.2f' % ropA[0]) root.find('backtracing/INITCC/AFTER/cluster').text = str(cluster) root.find('backtracing/INITCC/AFTER/top_initcc').text = str('%.2f' % topINITCC_A) if typemode == "FAST": ciclus = cycle # FAST p = subprocess.Popen("grep -H CC " + os.path.join(outputDir, fromdirexp + "/" + str(cluster) + "/" + str( ciclus) + "/*/" + os.path.basename(backPDB)), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() fincc = float(linlis[6][:-1]) restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) if root.find('backtracing/EXP') is None: ET.SubElement(root.find('backtracing'), 'EXP') ET.SubElement(root.find('backtracing/EXP'), 'cycle').text = str(ciclus) ET.SubElement(root.find('backtracing/EXP'), 'finalcc').text = str('%.2f' % finalcc[0][0]) ET.SubElement(root.find('backtracing/EXP'), 'restraced').text = str('%.2f' % finalcc[0][1]) else: root.find('backtracing/EXP/cycle').text = str(ciclus) root.find('backtracing/EXP/finalcc').text = str('%.2f' % finalcc[0][0]) root.find('backtracing/EXP/restraced').text = str('%.2f' % finalcc[0][1]) shutil.copyfile(backPDB, os.path.join(outputDir, "best.pdb")) if fromphis: best_ext = ".phi" else: best_ext = ".pda" shutil.copyfile(backPDB[:-4] + best_ext, os.path.join(outputDir, "best" + best_ext)) shutil.copyfile(backPDB[:-4] + ".phs", os.path.join(outputDir, "best.phs")) shutil.copyfile(backPDB[:-4] + ".lst", os.path.join(outputDir, "best.lst")) if float(FINALCC) >= 30: SOLVED = True if (mode.startswith("ARCIMBOLDO-BORGES") or mode == "ARCIMBOLDO-CLUSTERS") and step == "FRF" and frag_fixed == 1: stats, listRotaClus, ntotalpdbs, nmaxllg, nmaxzscore = __statsSteps(DicParameters, ClusAll, ensembles, frag_fixed, LIMIT_CLUSTER=LIMIT_CLUSTER, getGlobalStats=True) # NOTE: This was the old way using first by number of rotations and for clusters with the same size by top llg # listRotaClus = sorted(listRotaClus,reverse=True) # stats[numc] = [nrts, distpdb, llg_max, llg_avg, zscore_max, zscore_avg, llg_std, llg_min] listRotaClus = sorted(listRotaClus, key=lambda x: ( (stats[x[2]][1] / ntotalpdbs) + (stats[x[2]][2] / nmaxllg) + (stats[x[2]][4] / nmaxzscore)), reverse=True) # print listRotaClus # NOTE: Filter by mean rotations mean_all_rot = map(lambda x: x[0], listRotaClus) mean_all_rot = numpy.mean(numpy.array(mean_all_rot)) # NOTE: Filter by mean LLG # mean_all_llg = map(lambda x: x[1], listRotaClus) # mean_all_llg = numpy.mean(numpy.array(mean_all_llg)) listClusters = [] # best,clubest=listRotaClus[0] # listClusters.append(clubest) for t in range(len(listRotaClus)): npdbs, llgclu, nclu = listRotaClus[t] # print diff, (best*50/100.0), diff < (best*50/100.0) # NOTE: Filter out by LLG # if llgclu >= mean_all_llg-10: #(best*50/100.0): # NOTE: Filter out by Number of rotations if not filterClusters or npdbs >= mean_all_rot: # (best*50/100.0): listClusters.append(nclu) ET.SubElement(root, 'rot_clustering') ET.SubElement(root.find('rot_clustering'), 'full') ET.SubElement(root.find('rot_clustering'), 'reduced') ET.SubElement(root.find('rot_clustering'), 'green') s = [] h = [] g = [] for indre in listRotaClus: npdbs, nrot, nclu = indre if nclu in listClusters: s.append((nclu, '""')) h.append((nclu, '""')) g.append(nclu) else: h.append((nclu, '"none"')) s.append((nclu, '""')) root.find('rot_clustering/full').text = json.dumps(s) root.find('rot_clustering/reduced').text = json.dumps(h) root.find('rot_clustering/green').text = json.dumps(g) s = h = g = None data_graph = "[" liun = stats.keys() role = True for ind in range(len(liun)): key = liun[ind] value = stats[key] data_graph += """[\"""" + str(key) + """\", """ + str(value[2]) + """, """ + str(value[4]) + """, """ + str( value[0]) + """, """ + str(role).lower() + """]""" role = not role if ind == len(liun) - 1: data_graph += """\n""" else: data_graph += """,\n""" data_graph += """\n]\n""" ET.SubElement(root.find('rot_clustering'), 'graphfull').text = data_graph liun = stats.keys() data_graph = "[" role = True liun = [k for k in liun if int(k) in listClusters] for ind in range(len(liun)): key = liun[ind] value = stats[key] data_graph += """[\"""" + str(key) + """\", """ + str(value[2]) + """, """ + str(value[4]) + """, """ + str( value[0]) + """, """ + str(role).lower() + """]""" role = not role if ind == len(liun) - 1: data_graph += """\n""" else: data_graph += """,\n""" data_graph += """\n]\n""" ET.SubElement(root.find('rot_clustering'), 'graphreduced').text = data_graph for key in stats.keys(): value = stats[key] ET.SubElement(root.find('rot_clustering'), 'FRF-' + str(key)) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'cluster').text = str(key) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'rotations').text = str(value[0]) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'distinct').text = str(value[1]) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'top_llg').text = str('%.2f' % (value[2])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'mean_llg').text = str('%.2f' % (value[3])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'top_zscore').text = str('%.2f' % (value[4])) ET.SubElement(root.find('rot_clustering/FRF-' + str(key)), 'mean_zscore').text = str('%.2f' % (value[5])) ed = root.find('TIME') listatime = [] #print 'SHERLOCK ed',ed #print 'SHERLOCK listatime',listatime if ed is not None: listatime = json.loads(ed.text) else: ed = ET.SubElement(root, 'TIME') listatime.append((mode, step, str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M")))) ed.text = json.dumps(listatime) tree.write(nomexml) print "XML WRITTEN", nomexml, frag_fixed if SOLVED: print "Your current best solution has a Final CC of ", FINALCC, "%" lock.release() arci_output.generateHTML(lock, outputDir, filename) if STOP_IF_SOLVED: sys.exit(0) if mode.startswith("ARCIMBOLDO-BORGES") and step == "FRF": lock.release() return listClusters if not SOLVED: lock.release() def writeSumClusters(Clusters, dirout, filename, convNames, RotClu=[], LIMIT_CLUSTER=None, saveMAP=False, euler_frac_zero=False): """ Writes a summary file containing information about solutions. :param Clusters: :type Clusters: :param dirout: directory where to write the sum file :type dirout: str :param filename: name for the sum file :type filename: str :param convNames: :type convNames: dictionary :param RotClu: :type RotClu: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param saveMAP: :type saveMAP: """ global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB if not os.path.exists(dirout): os.makedirs(dirout) pathfull = os.path.join(dirout, filename + ".sum") f = open(pathfull, "w") frag_fixed = False for ci in range(len(Clusters)): if LIMIT_CLUSTER != None and LIMIT_CLUSTER != ci: continue clu = Clusters[ci] if clu == None: f.write("============================== CLUSTER " + str( ci) + " ====== LONGEST SHIFT 0 0 ==============================" + "\n") f.write("===============================================================================" + "\n") continue lion = (clu["heapSolutions"]).asList() if "longest" not in clu: clu["longest"] = (0, 0) f.write("============================== CLUSTER " + str(ci) + " ====== LONGEST SHIFT " + str( clu["longest"][0]) + " " + str(clu["longest"][1]) + " ==============================" + "\n") for i in range(len(lion)): rota = (lion[i])[1] if euler_frac_zero: rota["euler"] = [0.0, 0.0, 0.0] rota["frac"] = [0.0, 0.0, 0.0] pdbname = "None" try: pdbname = convNames[rota["name"]] #print 'rota["name"]',rota["name"] #sys.exit(0) except: pdbname = "None" if "fixed_frags" not in rota: rota["fixed_frags"] = [] if "fixed_frags" in rota: frag_fixed = True f.write("///////////////FIXED FRAGMENTS///////////////" + "\n") for w in range(len(rota["fixed_frags"])): rotap = (rota["fixed_frags"])[w] pdbname2 = "None" try: pdbname2 = convNames[rotap["name"]] except: pdbname2 = "None" f.write("From Cluster: " + str(rotap["n_prev_cluster"]) + "\t") if "original_rotcluster" in rotap: f.write("Original Rot. Cluster: " + str(rotap["original_rotcluster"]) + "\n") if "numInRlist" in rotap: f.write( "n. " + str(i) + " coming from the pdb: " + pdbname2 + " ---> " + rotap["name"] + " ---> " + rotap["name"] + ".rlist" + " n.Rota: " + str(rotap["numInRlist"]) + "\n") else: f.write( "n. " + str(i) + " coming from the pdb: " + pdbname2 + " ---> " + rotap["name"] + " ---> " + rotap["name"] + ".rlist" + " n.Rota: " + "unknown" + "\n") rmsd = "None" if "rmsd" in rotap: rmsd = rotap["rmsd"] if "vrms" in rotap: f.write("EULER: " + str(rotap["euler"]) + "\t" + "QUATERNION:" + str( rotap["quaternion"]) + "\tVRMSD: " + str(rotap["vrms"]) + "\tVRMSD_DELTA: " + str(rotap["vrms_delta"])+"\tRMSD: "+str(rmsd)+"\n") else: f.write( "EULER: " + str(rotap["euler"]) + "\t" + "QUATERNION:" + str(rotap["quaternion"]) + "\tRMSD: "+str(rmsd)+ "\n") if "numInSol" in rotap: f.write("FRAC: " + str(rotap["frac"]) + "\t" + "n.Sol: " + str(rotap["numInSol"]) + "\n") else: f.write("FRAC: " + str(rotap["frac"]) + "\n") if "parameters" not in rotap.keys(): if "tfz==" in rotap: f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str( rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\tTFZ==: " + str( rotap["tfz=="]) + "\n") else: f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str( rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\n") else: if "tfz==" in rotap: f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str( rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\tPDB: " + str( rotap["parameters"]["model_pdb"]) + "\tRMSD: " + str( rotap["parameters"]["rmsd"]) + "\tROT_RES: " + str( rotap["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str( rotap["parameters"]["rot_sampl"]) + "\tTFZ==: " + str(rotap["tfz=="]) + "\n") else: f.write( "LLG: " + str(rotap["llg"]) + "\t" + "ZSCORE: " + str( rotap["zscore"]) + "\tELONG: " + str( rotap["elong"]) + "\tBFAC: " + str(rotap["bfactor"]) + "\tPDB: " + str( rotap["parameters"]["model_pdb"]) + "\tRMSD: " + str( rotap["parameters"]["rmsd"]) + "\tROT_RES: " + str( rotap["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str( rotap["parameters"]["rot_sampl"]) + "\n") for lis in (rotap["rotationMatrices"])[1:2]: f.write("EQUIVALENT SIMMETRY: " + str(lis) + "\n") f.write("----------------" + "\n") f.write("/////////////////////////////////////////////" + "\n") if "n_prev_cluster" in rota: f.write("From Cluster: " + str(rota["n_prev_cluster"]) + "\t") else: f.write("From Cluster: " + str(ci) + " ") if "original_rotcluster" in rota: f.write("Original Rot. Cluster: " + str(rota["original_rotcluster"]) + "\n") if "numInRlist" in rota: #print 'rota["name"]', rota["name"] #print 'rota["numInRlist"]', rota["numInRlist"] #print 'Writing to file: ' #print ("n. " + str(i) + " coming from the pdb: " + pdbname + " ---> " + rota["name"] + " ---> " + rota[ # "name"] + ".rlist" + " n.Rota: " + str(rota["numInRlist"]) + "\n") #sys.exit(0) f.write("n. " + str(i) + " coming from the pdb: " + pdbname + " ---> " + rota["name"] + " ---> " + rota[ "name"] + ".rlist" + " n.Rota: " + str(rota["numInRlist"]) + "\n") else: f.write("n. " + str(i) + " coming from the pdb: " + pdbname + " ---> " + rota["name"] + " ---> " + rota[ "name"] + ".rlist" + " n.Rota: " + "unknown" + "\n") rmsd = "None" if "rmsd" in rota: rmsd = rota["rmsd"] if "vrms" in rota: f.write( "EULER: " + str(rota["euler"]) + "\t" + "QUATERNION:" + str(rota["quaternion"]) + "\tVRMSD: " + str( rota["vrms"]) + "\tVRMSD_DELTA: " + str(rota["vrms_delta"])+ "\tRMSD: "+str(rmsd)+ "\n") else: f.write("EULER: " + str(rota["euler"]) + "\t" + "QUATERNION:" + str(rota["quaternion"]) + "\tRMSD: "+str(rmsd)+"\n") if "numInSol" in rota: f.write("FRAC: " + str(rota["frac"]) + "\t" + "n.Sol: " + str(rota["numInSol"]) + "\n") else: f.write("FRAC: " + str(rota["frac"]) + "\n") if "parameters" not in rota.keys(): if "tfz==" in rota: f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\tTFZ==: " + str(rota["tfz=="]) + "\n") else: f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\n") else: if "tfz==" in rota: f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\tPDB: " + str( rota["parameters"]["model_pdb"]) + "\tRMSD: " + str( rota["parameters"]["rmsd"]) + "\tROT_RES: " + str( rota["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str( rota["parameters"]["rot_sampl"]) + "\tTFZ==: " + str(rota["tfz=="]) + "\n") else: f.write("LLG: " + str(rota["llg"]) + "\t" + "ZSCORE: " + str(rota["zscore"]) + "\tELONG: " + str( rota["elong"]) + "\tBFAC: " + str(rota["bfactor"]) + "\tPDB: " + str( rota["parameters"]["model_pdb"]) + "\tRMSD: " + str( rota["parameters"]["rmsd"]) + "\tROT_RES: " + str( rota["parameters"]["rot_res"]) + "\tROT_SAMPL: " + str(rota["parameters"]["rot_sampl"]) + "\n") for lis in (rota["rotationMatrices"])[1:2]: f.write("EQUIVALENT SIMMETRY: " + str(lis) + "\n") f.write("----------------" + "\n") f.write("===============================================================================" + "\n") f.close() if frag_fixed: f = open(dirout + filename + "Rotations.sum", "w") pickle.dump(LAST_AVAILABLE_ROTID, f) pickle.dump(MAP_OF_ROT_COMB, f) f.close() if len(RotClu) == 0: return # All the following will be executed only if we are not at the first fragment search f = open(dirout + filename + "Rotations.sum", "w") pickle.dump(LAST_AVAILABLE_ROTID, f) pickle.dump(MAP_OF_ROT_COMB, f) f.close() if not saveMAP: return writeSumClusters(RotClu, dirout, filename + "ROTCLU", convNames, RotClu=[]) def getListPDBtoPerform(Clusters, convNames, dirBase=None): """ :param Clusters: :type Clusters: :param convNames: :type convNames: :param dirBase: :type dirBase: :return: :rtype: """ lista = [] for clu in Clusters: for rota in clu: if dirBase == None: lista.append(convNames[rota["name"]]) else: lista.append(dirBase + convNames[rota["name"]]) return lista def analyzeROTclusters(DicParameters, sumPathAll, sumPathEnt, outputDir, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG, fromV, toV): """ :param DicParameters: :type DicParameters: :param sumPathAll: :type sumPathAll: :param sumPathEnt: :type sumPathEnt: :param outputDir: :type outputDir: :param thresholdCompare: :type thresholdCompare: :param ClusteringMode: :type ClusteringMode: :param quate: :type quate: :param laue: :type laue: :param listNCS: :type listNCS: :param ensembles: :type ensembles: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param evaLLONG: :type evaLLONG: :param fromV: :type fromV: :param toV: :type toV: :return: :rtype: """ Clu2, cnv2 = readClustersFromSUM(sumPathEnt) f = open(os.path.join(outputDir, "evaluation.sum"), "w") f.write("ENT file produces " + str(len(Clu2)) + " cluster rotations.\n\n") result = False for i in range(len(Clu2)): clu2 = Clu2[i] liClu2 = clu2['heapSolutions'].asList() prio2, rota2 = liClu2[0] result2 = False Clu1, cnv1 = readClustersFromSUM(sumPathAll) for j in range(len(Clu1)): clu1 = Clu1[j] liClu1 = clu1['heapSolutions'].asList() prio1, rota1 = liClu1[0] # if rota2["n_prev_cluster"] == 0 and rota1["n_prev_cluster"] == 13: # print rota2["name"],cnv2[rota2["name"]],rota2["llg"],rota2["zscore"] # print rota1["name"],cnv1[rota1["name"]],rota1["llg"],rota1["zscore"] # result,elong = compareRotation(rota2,rota1,thresholdCompare,ClusteringMode,quate,laue,listNCS,ensembles,cell_dim,evaLLONG,print_angles=True) # else: result, elong = compareRotation(rota2, rota1, thresholdCompare, ClusteringMode, quate, laue, listNCS, ensembles, cell_dim, evaLLONG) angle = rota1["angle"] if result: f.write("Rotation ENT cluster id. " + str(rota2["n_prev_cluster"]) + " with LLG " + str( rota2["llg"]) + " and ZSCORE " + str( rota2["zscore"]) + " corresponds to library cluster id. " + str( rota1["n_prev_cluster"]) + " with LLG " + str(rota1["llg"]) + " and ZSCORE " + str( rota1["zscore"]) + "\n") result2 = True f.write("ENT Cl: " + str(rota2["n_prev_cluster"]) + " LIB Cl: " + str( rota1["n_prev_cluster"]) + " min. angle: " + str(angle) + "\n") if not result2: f.write("Rotation ENT cluster id. " + str(rota2["n_prev_cluster"]) + " with LLG " + str( rota2["llg"]) + " and ZSCORE " + str(rota2["zscore"]) + " not corresponds to any library clusters\n") f.close() lid = [] for i in range(len(Clu1)): clu1 = Clu1[i] liClu1 = clu1['heapSolutions'].asList() stats = [] idcluster = 0 for item in liClu1: prio, rota = item pdbf = os.path.basename(cnv1[rota["name"]]) pdbid = pdbf.split("_")[0] stats.append([int(pdbid[4:]), rota["llg"], rota["zscore"], rota["numInRlist"]]) idcluster = rota["n_prev_cluster"] lid.append(idcluster) stats = sorted(stats, __cmp_statsrt) # values = tuple(map(lambda x: x[3],stats)) # names = tuple(map(lambda x: x[0],stats)) f = open(os.path.join(outputDir, "clust" + str(i) + ".dat"), "w") ult_nome = fromV for item in stats: cur_nome = item[0] while ult_nome < cur_nome: f.write(str(ult_nome) + "\t0\t0\n") ult_nome += 1 f.write(str(item[0]) + "\t" + str(item[3]) + "\t" + str(item[1]) + "\n") ult_nome += 1 if ult_nome < toV: f.write(str(ult_nome) + "\t0\t0\n") ult_nome += 1 f.close() for i in range(len(Clu1)): f = open(os.path.join(outputDir, "clust" + str(i) + ".scr"), "w") stringas = """ clear reset #set terminal postscript eps size 3.5,2.62 enhanced color font 'Helvetica,20' linewidth 2 set terminal png size 2000,800 set output '""" + os.path.abspath(os.path.join(outputDir, "clust" + str(i) + ".png")) + """' unset key set xtics rotate out set style data histograms set style fill solid border set style histogram clustered plot 'clust""" + str(i) + """.dat' using 2:xticlabels(1) title 'Analisys cluster """ + str(lid[i]) + """', '' using 3:xticlabels(1) """ f.write(stringas) f.close() # p = subprocess.Popen(['gnuplot', os.path.join(outputDir,"clust"+str(i)+".scr")], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # out, err = p.communicate() # print out # print err def __getStringOfRototranslatedStructure(DicParameters, frag_fixed, dirout, mode, quate, convNames, ntop, writePDB, performTranslation, elongatingModel, createSimmetry, cell_dim, laue, ncs, bestRota, ci, i, modeTra="frac", LIMIT_CLUSTER=None, renameWithConvNames=False, sufixSolPos=False, where_to_write={}): """ :param DicParameters: :type DicParameters: :param frag_fixed: :type frag_fixed: :param dirout: :type dirout: :param mode: :type mode: :param quate: :type quate: :param convNames: :type convNames: :param ntop: :type ntop: :param writePDB: :type writePDB: :param performTranslation: :type performTranslation: :param elongatingModel: :type elongatingModel: :param createSimmetry: :type createSimmetry: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param laue: :type laue: :param ncs: :type ncs: :param bestRota: :type bestRota: :param ci: :type ci: :param i: :type i: :param modeTra: :type modeTra: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param renameWithConvNames: :type renameWithConvNames: :param sufixSolPos: :type sufixSolPos: :param where_to_write: :type where_to_write: :return: :rtype: """ structureRef = None quo = None nomeFile = "" listNames = [] if elongatingModel == None or clu["longest"] == [0, 0]: parser = PDBParser() structure = parser.get_structure(bestRota["name"], convNames[bestRota["name"]]) elif elongatingModel != None: # print "SHERLOCK We need to elongate the helix ",clu["longest"] structure = getLongerFragment(elongatingModel, clu["longest"], convNames[bestRota["name"]]) if mode == "matrix": rotateStructureByMatrix(ci, i, structure, bestRota["rotationMatrices"], dirout, cell_dim) nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) elif mode == "quaternion": rotateStructureByQuaternion(ci, i, structure, quate, bestRota["quaternion"], dirout) nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) elif mode == "simmetry_rotated": if "simmetry_rotated" in bestRota: # rotateStructureByQuaternion(ci,i,structure,quate,rota["quaternion"],dirout) # parser=PDBParser() # structure=parser.get_structure(rota["name"],dirout+str(ci)+"_"+str(i)+"_rot.pdb") rotateStructureByQuaternion(ci, i, structure, quate, bestRota["simmetry_rotated"], dirout) quo = bestRota["simmetry_rotated"] nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) else: rotateStructureByQuaternion(ci, i, structure, quate, bestRota["quaternion"], dirout) quo = bestRota["quaternion"] nomeFile = dirout + str(ci) + "_" + str(i) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i) + "_rot.pdb", nomeFile) listNames.append(nomeFile) if createSimmetry: nSymm = 100 * i for rti, rtq in (quate.matricesRot[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue # new_quat = quate.RotateQuaternion(rot1["quaternion"],rtq) # aas = [[[1,0,0],[0,1,0],[0,0,1]]]+[rtq]#bestRota["rotationMatrices"]#+[rtq] aas = bestRota["rotationMatrices"] + [rtq] rotateStructureByMatrix(ci, i + nSymm, structure, aas, dirout, cell_dim) nomeFile = dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb" if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb", nomeFile) nSymm += 1 listNames.append(nomeFile) if modeTra == "frac" and performTranslation: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(bestRota["name"], nomeFile) translateStructurebyFrac(ci, i, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyFrac(ci, i, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str( i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 elif performTranslation and modeTra == "Cmass": # print "Devo scrivere traslai",i if i == 0: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structureRef = parser.get_structure(bestRota["name"], dirout + os.path.basename(convNames[bestRota["name"]])) translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirout) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structureRef = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structureRef, structureRef, dirout) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[ :-4] + str(i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 else: parser = PDBParser() if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]]) if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(bestRota["name"], dirout + os.path.basename(convNames[bestRota["name"]])) translateStructurebyCentroidMass(ci, i, structure, structureRef, dirout) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(bestRota["name"], dirout + str(ci) + "_" + str(i) + "_rot.pdb") translateStructurebyCentroidMass(ci, i, structure, structureRef, dirout) os.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass if createSimmetry: nSymm = 100 for rti, rtq in (quate.quaterRotaz[laue]).iteritems(): if nSymm == 100: nSymm += 1 continue if renameWithConvNames: nomeFile = dirout + os.path.basename(convNames[bestRota["name"]])[:-4] + str(i + nSymm) + ".pdb" if sufixSolPos: nomeFile = nomeFile[:-4] + "-" + bestRota["name"].split("-")[1] + ".pdb" structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + os.path.basename(convNames[bestRota["name"]])[ :-4] + str(i + nSymm) + ".pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(nomeFile) shutil.move(dirout + str(ci) + "_" + str(i + nSymm) + "_rottra.pdb", nomeFile) else: structure = parser.get_structure(str(ci) + "_" + str(i + nSymm) + "_rot.pdb", dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") translateStructurebyFrac(ci, i + nSymm, structure, bestRota["frac"], dirout, cell_dim) os.remove(dirout + str(ci) + "_" + str(i + nSymm) + "_rot.pdb") try: listNames.remove(dirout + str(ci) + "_" + str(i) + "_rot.pdb") listNames.append(dirout + str(ci) + "_" + str(i) + "_rottra.pdb") except Exception: pass nSymm += 1 if len(list(where_to_write.keys()))>0: lider = [] for name in where_to_write.values(): for liname in listNames: if os.path.basename(name)[:-4] in liname: shutil.move(liname, name) lider.append(name) listNames = lider return listNames def readRefFromSUM(sumPath): """ :param sumPath: :type sumPath: :return: :rtype: """ f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 CCVAL = [] while line != None and line != "": # process line if line.startswith("======="): print "start reading Ref. n. " + str(numClus) riga1 = line riga2 = (f.readline()).split() riga3 = (f.readline()).split() riga4 = f.readline() model = int(riga2[1]) corresp = getNewPathFromMerging(sumPath, riga2[3]) rmsd = float(riga2[5]) # print 'riga3',riga3 llg = float(riga3[1]) zscore = float(riga3[3]) nref = float(riga3[5]) ncom = float(riga3[7]) dizio = {"model": model, "corresp": corresp, "rmsd": rmsd, "llg": llg, "zscore": zscore, "nref": nref, "ncom": ncom} CCVAL.append(dizio) convNames[os.path.basename(corresp)] = corresp line = f.readline() numClus += 1 f.close() return (CCVAL, convNames) def readCCValFromSUM(sumPath): """ Reads a sum file of the type solCC.sum and returns the list of dictionaries with the information from each. :param sumPath: path to the sum file :type sumPath: str :return: CCVal, list of dictionaries. Each one is a solution, and contains the following keys: {"model": model, "corresp": corresp, "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": cluster, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": contrast, "connect": connect, "mfom": mfom, "sfom": sfom} :rtype: list """ if not os.path.exists(sumPath): return [], {} f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 CCVAL = [] while line != None and line != "": # process line if line.startswith("======="): print "start reading CC_Val n. " + str(numClus) riga1 = line riga2 = (f.readline()).split() riga3 = (f.readline()).split() riga4 = (f.readline()).split() riga5 = f.readline() model = int(riga2[1]) corresp = getNewPathFromMerging(sumPath, riga2[3]) cluster = riga2[5] if cluster == "None": cluster = "None" else: cluster = cluster nAtoms = int(riga3[1]) nER = int(riga3[3]) initCC = float(riga3[5]) finalCC = float(riga3[7]) wMPEa = float(riga3[9]) wMPEb = float(riga3[11]) wMPEc = float(riga3[13]) wMPEd = float(riga3[15]) shx = float(riga4[1]) shy = float(riga4[2]) shz = float(riga4[3]) contrast = float(riga4[5]) connect = float(riga4[7]) mfom = float(riga4[9]) sfom = float(riga4[11]) dizio = {"model": model, "corresp": corresp, "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": cluster, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": contrast, "connect": connect, "mfom": mfom, "sfom": sfom} CCVAL.append(dizio) line = f.readline() numClus += 1 f.close() return (CCVAL, convNames) def readClustersFromSUMToDB(DicParameters, sumPath, table, LIMIT_CLUSTER=None, skip_reading_variables=False, give_fixed_frags=False, euler_to_zero=False): """ :param DicParameters: :type DicParameters: :param sumPath: :type sumPath: :param table: :type table: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param skip_reading_variables: :type skip_reading_variables: :param give_fixed_frags: :type give_fixed_frags: :param euler_to_zero: :type euler_to_zero: :return: :rtype: """ global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB print "Reading:", sumPath please_exit = False if os.path.exists(sumPath): f = open(sumPath, "r") if f.readline() == "": f.close() please_exit = True else: please_exit = True if please_exit: if give_fixed_frags: return {}, [], [], [], {} else: return {}, [], [], {} genera = readClustersFromSUMwithYELD(sumPath, euler_to_zero=euler_to_zero) if not skip_reading_variables and os.path.exists(sumPath[:-4] + "Rotations.sum"): # print "TABLE",table,sumPath f = open(sumPath[:-4] + "Rotations.sum", "r") LAST_AVAILABLE_ROTID = pickle.load(f) MAP_OF_ROT_COMB = pickle.load(f) f.close() RotClu = [] encn = {} if not skip_reading_variables and os.path.exists(sumPath[:-4] + "ROTCLU.sum"): encn, RotClu, lose, lose2 = readClustersFromSUMToDB(DicParameters, sumPath[:-4] + "ROTCLU.sum", "ROTCLU", LIMIT_CLUSTER=LIMIT_CLUSTER, skip_reading_variables=True) ClusAll = [] numero_all_clu = genera.next() numfixed = 0 for clu in range(numero_all_clu): dicton = {"heapSolutions": ADT.Heap()} sol = genera.next() if sol == None: continue numclu = sol[1]["n_prev_cluster"] while sol != None: prio, rota = sol try: numfixed = len(rota['fixed_frags']) except: pass dicton["heapSolutions"].push(prio, rota) sol = genera.next() if len(ClusAll) <= numclu: while len(ClusAll) < numclu + 1: ClusAll.append({"heapSolutions": ADT.Heap()}) ClusAll[numclu] = dicton if not skip_reading_variables: LAST_AVAILABLE_ROTID = numero_all_clu if give_fixed_frags: return genera.next(), ClusAll, RotClu, encn, numfixed else: return genera.next(), ClusAll, RotClu, encn def getNewPathFromMerging(sumPath, rigali): """ :param sumPath: :type sumPath: :param rigali: :type rigali: :return: :rtype: """ global BASE_SUM_FROM_WD rigali = os.path.normpath(rigali) sumPath = os.path.normpath(sumPath) tor = "" if not BASE_SUM_FROM_WD: tor = rigali else: # NOTE: I could create a function to extract the WD from a path # To be honest it should be better to put complete names as 1_FRF_LIBRARY # To avoid splitting in places where we do not want as in 3efg_1_0.pdb sumPath = sumPath.split("ELLG_COMP")[0] sumPath = sumPath.split("_ensembles")[0] sumPath = sumPath.split("ens1_frag")[0] sumPath = sumPath.split("models")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/1_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/2_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/6_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/7.5_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/onemodel")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/3_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/4_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/8.5_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/8_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/9.5_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/9_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/10_")[0] sumPath = sumPath.split("ARCIMBOLDO_BORGES/11_")[0] sumPath = sumPath.split("1_")[0] sumPath = sumPath.split("2_")[0] sumPath = sumPath.split("6_")[0] sumPath = sumPath.split("7.5_")[0] sumPath = sumPath.split("onemodel")[0] sumPath = sumPath.split("3_")[0] sumPath = sumPath.split("4_")[0] sumPath = sumPath.split("8.5_")[0] sumPath = sumPath.split("8_")[0] sumPath = sumPath.split("9.5_")[0] sumPath = sumPath.split("9_")[0] sumPath = sumPath.split("10_")[0] sumPath = sumPath.split("11_")[0] if sumPath.endswith("/"): sumPath = sumPath[:-1] else: sumPath = os.path.split(sumPath)[0] rigas = rigali rigas = rigas.split("ELLG_COMP")[0] rigas = rigas.split("ens1_frag")[0] rigas = rigas.split("models")[0] rigas = rigas.split("_ensembles")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/1_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/2_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/6_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/7.5_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/onemodel")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/3_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/4_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/8.5_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/8_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/9.5_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/9_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/10_")[0] rigas = rigas.split("ARCIMBOLDO_BORGES/11_")[0] rigas = rigas.split("1_")[0] rigas = rigas.split("2_")[0] rigas = rigas.split("6_")[0] rigas = rigas.split("7.5_")[0] rigas = rigas.split("onemodel")[0] rigas = rigas.split("3_")[0] rigas = rigas.split("4_")[0] rigas = rigas.split("8.5_")[0] rigas = rigas.split("8_")[0] rigas = rigas.split("9.5_")[0] rigas = rigas.split("9_")[0] rigas = rigas.split("10_")[0] rigas = rigas.split("11_")[0] rigas = rigas.split("_ensembles")[0] if not rigas.endswith("/"): rigas = os.path.split(rigas)[0] # print "rigali===================",rigali if os.path.exists(rigali): tor = rigali else: # print sumPath # print rigas # print rigali strinit1, strcommon1, strend1 = ADT.split_string_by_common(rigali, rigas) if strend1.startswith("/"): strend1 = strend1[1:] # print "============", strinit1,strcommon1,strend1 tor = os.path.join(sumPath, strend1) """ common = ADT.LCS(sumPath, rigali) # base = sumPath.split(common)[0] #print "VERY COMMON",common different, common, starting, ending = ADT.substract_string(sumPath, common) different2, common2, starting2, ending2 = ADT.substract_string(rigali, common) #print "sumpath",sumPath,"COMMON",common #print "STARTING",starting,"1",os.path.join(common,different2),"2",os.path.join(different,common) if starting and len(common) > 0: print "----",common, different2 tor = os.path.join(common, different2) elif ending and len(common) > 0: tor = os.path.join(different, common) elif (not starting and not ending) and len(common) == 0: tor = rigali elif (not starting and not ending): strinit1,strcommon1,strend1 = ADT.split_string_by_common(sumPath, common) strinit2,strcommon2,strend2 = ADT.split_string_by_common(rigali, common) if os.path.exists(os.path.join(os.path.join(strinit1,strcommon1),strend2)): #CASE with commom /text/ tor = os.path.join(os.path.join(strinit1,strcommon1),strend2) else: tor = os.path.join(strinit1+strcommon1,strend2) print strinit1,strcommon1,strend1 print strinit2,strcommon2,strend2 print sumPath print rigali print common print tor """ if not os.path.exists(tor): print tor print "ATTENTION: The program cannot automatically recover and interpret the precomputed run from another working directory.\n Please, remove intermediate directory and restart." sys.exit(0) return tor def readClustersFromSUMwithYELD(sumPath, euler_to_zero=False): """ :param sumPath: :type sumPath: :param euler_to_zero: :type euler_to_zero: :return: :rtype: """ ntrials = 10 nt = 0 out = "" numClus = None while nt <= ntrials: try: p = subprocess.Popen(["grep", "= CLUSTER", sumPath], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() numClus = len(out.splitlines()) break except: print "subprocess not ready...." nt += 1 # time.sleep(4) if len(out) == 0: f = open(sumPath) e = f.read() f.close() numClus = e.count("= CLUSTER") yield numClus f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 while line != None and line != "": # process line if line.startswith("================="): print "start reading Cluster n. " + str(numClus) ytr = line.split() clu = {} if ytr[6] == "None": ytr[6] = 0 if ytr[7] == "None": ytr[7] = 0 clu["longest"] = [int(ytr[6]), int(ytr[7])] line = f.readline() numRot = 0 quot = Quaternions.Quaternions() signs = None while not line.startswith("==================="): fixed = [] if line.startswith("/////////"): line = f.readline() while not line.startswith("////////"): # print "...inserting rotation n. "+str(numRot) dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) if euler_to_zero: dizio["euler"] = [0.0, 0.0, 0.0] else: dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) if len(riga4.split()) == 10: dizio["tfz=="] = float((riga4.split())[9]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) if len(riga4.split()) == 16: dizio["parameters"] = {"model_pdb": riga4.split()[9], "rmsd": float(riga4.split()[11]), "rot_res": float(riga4.split()[13]), "rot_sampl": float(riga4.split()[15])} q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] try: typet = riga2.split() if len(typet) > 8: if typet[8] == "VRMSD:": vrms = float(((riga2.split())[9])) dizio["vrms"] = vrms vrms_delta = float(((riga2.split())[11])) dizio["vrms_delta"] = vrms_delta elif typet[8] == "RMSD:": rms = float(((riga2.split())[9])) dizio["rmsd"] = rms if len(typet) > 12: if typet[12] == "RMSD:": rms = float(((riga2.split())[13])) dizio["rmsd"] = rms except: pass # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath, riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): fixed.append(dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() line = f.readline() dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() if len(fixed) > 0: dizio["fixed_frags"] = fixed e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) if euler_to_zero: dizio["euler"] = [0.0, 0.0, 0.0] else: dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) if len(riga4.split()) == 10: dizio["tfz=="] = float((riga4.split())[9]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) if len(riga4.split()) == 16: dizio["parameters"] = {"model_pdb": riga4.split()[9], "rmsd": float(riga4.split()[11]), "rot_res": float(riga4.split()[13]), "rot_sampl": float(riga4.split()[15])} q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] try: typet = riga2.split() if len(typet) > 8: if typet[8] == "VRMSD:": vrms = float(((riga2.split())[9])) dizio["vrms"] = vrms vrms_delta = float(((riga2.split())[11])) dizio["vrms_delta"] = vrms_delta elif typet[8] == "RMSD:": rms = float(((riga2.split())[9])) dizio["rmsd"] = rms if len(typet) > 12: if typet[12] == "RMSD:": rms = float(((riga2.split())[13])) dizio["rmsd"] = rms except: pass # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath, riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): priority = (-1 * dizio["llg"], -1 * dizio["zscore"]) qlo = None if "simmetry_rotated" in dizio.keys(): qlo = dizio["simmetry_rotated"] else: qlo = dizio["quaternion"] if signs == None: signs = quot.signOfQuaternionComponents(qlo) soltuple = (priority, dizio) yield soltuple else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() numRot += 1 print "End reading of cluster n. " + str(numClus) yield None numClus += 1 line = f.readline() else: line = f.readline() f.close() yield convNames def readClustersFromSUM(sumPath): """ :param sumPath: :type sumPath: :return: :rtype: """ global LAST_AVAILABLE_ROTID global MAP_OF_ROT_COMB print "Reading:", sumPath reloaded_variables = False if os.path.exists(sumPath[:-4] + "Rotations.sum"): f = open(sumPath[:-4] + "Rotations.sum", "r") LAST_AVAILABLE_ROTID = pickle.load(f) MAP_OF_ROT_COMB = pickle.load(f) f.close() reloaded_variables = True f = open(sumPath, "r") line = f.readline() Clusters = [] convNames = {} numClus = 0 while line != None and line != "": # process line if line.startswith("================="): print "start reading Cluster n. " + str(numClus) ytr = line.split() clu = {} if ytr[6] == "None": ytr[6] = 0 if ytr[7] == "None": ytr[7] = 0 clu["longest"] = [int(ytr[6]), int(ytr[7])] hp = ADT.Heap() line = f.readline() numRot = 0 dul = {} dul["quaternion"] = [0.0, 0.0, 0.0, 0.0] dul["euler"] = [0.0, 0.0, 0.0] dul["frac"] = [0.0, 0.0, 0.0] dul["llg"] = 0.0 dul["zscore"] = 0.0 dul["sumQuat"] = [0.0, 0.0, 0.0, 0.0] dul["sumEuler"] = [0.0, 0.0, 0.0] dul["sumLlg"] = 0.0 dul["sumZscore"] = 0.0 dul["sumFrac"] = [0.0, 0.0, 0.0] quot = Quaternions.Quaternions() signs = None while not line.startswith("==================="): fixed = [] if line.startswith("/////////"): line = f.readline() while not line.startswith("////////"): # print "...inserting rotation n. "+str(numRot) dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) if len(riga4.split()) == 10: dizio["tfz=="] = float((riga4.split())[9]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath, riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): fixed.append(dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() line = f.readline() dizio = {} riga1 = line if riga1.startswith("From Cluster:"): dizio["n_prev_cluster"] = int((riga1.split())[2]) dizio["original_rotcluster"] = (riga1.split())[6] riga1 = f.readline() riga2 = f.readline() riga3 = f.readline() riga4 = f.readline() riga5 = f.readline() if len(fixed) > 0: dizio["fixed_frags"] = fixed #print 'SHERLOCK, riga2',riga2 e1 = float(((riga2.split())[1])[1:-1]) e2 = float(((riga2.split())[2])[:-1]) e3 = float(((riga2.split())[3])[:-1]) dizio["euler"] = [e1, e2, e3] dizio["rotationMatrices"] = [ matrixFromEulerAngles(dizio["euler"][0], dizio["euler"][1], dizio["euler"][2])] f1 = float(((riga3.split())[1])[1:-1]) f2 = float(((riga3.split())[2])[:-1]) f3 = float(((riga3.split())[3])[:-1]) if len(riga3.split()) == 6: dizio["numInSol"] = int((riga3.split())[5]) dizio["frac"] = [f1, f2, f3] dizio["zscore"] = float((riga4.split())[3]) if len(riga4.split()) == 10: dizio["tfz=="] = float((riga4.split())[9]) dizio["llg"] = float((riga4.split())[1]) el = 0 if (riga4.split())[5] != "None": el = float((riga4.split())[5]) dizio["elong"] = el dizio["bfactor"] = float((riga4.split())[7]) q1 = float(((riga2.split())[4])[12:-1]) q2 = float(((riga2.split())[5])[:-1]) q3 = float(((riga2.split())[6])[:-1]) q4 = float(((riga2.split())[7])[:-1]) dizio["quaternion"] = [q1, q2, q3, q4] # dizio["rotationMatrix"] = matrixFromEulerAngles(dizio["euler"][0],dizio["euler"][1],dizio["euler"][2]) # dizio["baseDir"] = baseDir dizio["name"] = ((riga1.split())[8]) if dizio["name"] not in convNames: convNames[dizio["name"]] = getNewPathFromMerging(sumPath, riga1.split()[6]) if (riga1.split())[12] != "unknown": dizio["numInRlist"] = int((riga1.split())[12]) if riga5.startswith("EQUIVALENT"): bra = riga5.split() a00 = float((bra[2])[2:-1]) a01 = float((bra[3])[:-1]) a02 = float((bra[4])[:-2]) a10 = float((bra[5])[1:-1]) a11 = float((bra[6])[:-1]) a12 = float((bra[7])[:-2]) a20 = float((bra[8])[1:-1]) a21 = float((bra[9])[:-1]) a22 = float((bra[10])[:-2]) dizio["rotationMatrices"].append([[a00, a01, a02], [a10, a11, a12], [a20, a21, a22]]) lastRiga = f.readline() else: lastRiga = riga5 if lastRiga.startswith("-------------"): priority = (-1 * dizio["llg"], -1 * dizio["zscore"]) qlo = None if "simmetry_rotated" in dizio.keys(): qlo = dizio["simmetry_rotated"] else: qlo = dizio["quaternion"] if signs == None: signs = quot.signOfQuaternionComponents(qlo) (dul["sumQuat"])[0] += abs(qlo[0]) (dul["sumQuat"])[1] += abs(qlo[1]) (dul["sumQuat"])[2] += abs(qlo[2]) (dul["sumQuat"])[3] += abs(qlo[3]) (dul["sumEuler"])[0] += (dizio["euler"])[0] (dul["sumEuler"])[1] += (dizio["euler"])[1] (dul["sumEuler"])[2] += (dizio["euler"])[2] (dul["sumFrac"])[0] += (dizio["frac"])[0] (dul["sumFrac"])[1] += (dizio["frac"])[1] (dul["sumFrac"])[2] += (dizio["frac"])[2] dul["sumLlg"] += dizio["llg"] dul["sumZscore"] += dizio["zscore"] hp.push(priority, dizio) else: print "ERROR---NOT CORRESPONDING ROWS!!!" print lastRiga print sumPath print dizio sys.exit(0) line = f.readline() numRot += 1 clu["heapSolutions"] = hp if hp.len() > 0: (dul["quaternion"])[0] = signs[0] * ((dul["sumQuat"])[0] / hp.len()) (dul["quaternion"])[1] = signs[1] * ((dul["sumQuat"])[1] / hp.len()) (dul["quaternion"])[2] = signs[2] * ((dul["sumQuat"])[2] / hp.len()) (dul["euler"])[0] = (dul["sumEuler"])[0] / hp.len() (dul["euler"])[1] = (dul["sumEuler"])[1] / hp.len() (dul["euler"])[2] = (dul["sumEuler"])[2] / hp.len() (dul["frac"])[0] = (dul["sumFrac"])[0] / hp.len() (dul["frac"])[1] = (dul["sumFrac"])[1] / hp.len() (dul["frac"])[2] = (dul["sumFrac"])[2] / hp.len() dul["llg"] = dul["sumLlg"] / hp.len() dul["zscore"] = dul["sumZscore"] / hp.len() clu['centroid'] = dul Clusters.append(clu) print 'End reading of cluster n. ' + str(numClus) numClus += 1 line = f.readline() else: line = f.readline() f.close() if not reloaded_variables: LAST_AVAILABLE_ROTID = numClus return (Clusters, convNames) def filterClustersByKmeansCrossValidated(Clusters, convNames, limit, treshJump, rotaPerSubGroup): """ :param Clusters: :type Clusters: :param convNames: :type convNames: :param limit: :type limit: :param treshJump: :type treshJump: :param rotaPerSubGroup: :type rotaPerSubGroup: :return: :rtype: """ Clus = [] for i in range(len(Clusters)): print 'SubClustering with K-Means Algorithm of the Cluster: ' + str(i) clu = Clusters[i] liClu = clu['heapSolutions'].asList() if len(liClu) < limit: Clus.append(clu) continue else: hp = ADT.Heap() dizio = {} dul = {} dul['quaternion'] = [0.0, 0.0, 0.0, 0.0] dul['euler'] = [0.0, 0.0, 0.0] dul['frac'] = [0.0, 0.0, 0.0] dul['llg'] = 0.0 dul['zscore'] = 0.0 dul['sumQuat'] = [0.0, 0.0, 0.0, 0.0] dul['sumEuler'] = [0.0, 0.0, 0.0] dul['sumLlg'] = 0.0 dul['sumZscore'] = 0.0 dul['sumFrac'] = [0.0, 0.0, 0.0] listaFOM = [] for r in liClu: prio = r[0] rota = r[1] listaFOM.append([rota['llg'], rota['zscore']]) narr = numpy.array(listaFOM) whitened = scipy.cluster.vq.whiten(narr) valori = [] v = 5 whishu = copy.deepcopy(whitened) numpy.random.shuffle(whishu) print 'V-Parameter chosen for the cross-validation is: ' + str(v) subs = numpy.array_split(whishu, v) kappa = 0 start = numpy.sqrt(len(whitened) / 2) / 2 for i in range(start, start * 2 * 2): avg_crossv = 0.0 for q in range(len(subs)): test = subs[q] avg_sqd = 0.0 for z in range(len(subs)): if z != q: training = subs[z] groups, labels = scipy.cluster.vq.kmeans2(training, i, iter=20, minit='points') sum_sqd = 0.0 for ctest in test: sqd_min = numpy.inf for centroid in groups: sqd = (centroid[0] - ctest[0]) ** 2 + (centroid[1] - ctest[1]) ** 2 if sqd < sqd_min: sqd_min = sqd sum_sqd += sqd_min avg_sqd += sum_sqd avg_sqd /= v - 1 avg_crossv += avg_sqd avg_crossv /= v valori.append([avg_crossv, i]) print i, avg_crossv, kappa = i if len(valori) > 1: jump = valori[-2][0] - valori[-1][0] print valori[-2][0] - valori[-1][0] if jump <= treshJump: break else: print print 'Performing a subcluster with K-Means with K=' + str(kappa) groups, labels = scipy.cluster.vq.kmeans2(whitened, kappa, iter=20, minit='points') subclu = [ADT.Heap() for _ in range(kappa)] for p in range(len(labels)): ind = labels[p] rota = liClu[p] subclu[ind].push(rota[0], rota[1]) quot = Quaternions.Quaternions() signs = None for sc in subclu: for l in range(rotaPerSubGroup): try: prio, item = sc.pop() hp.push(prio, item) qlo = None if 'simmetry_rotated' in item.keys(): qlo = item['simmetry_rotated'] else: qlo = item['quaternion'] if signs == None: signs = quot.signOfQuaternionComponents(qlo) dul['sumQuat'][0] += abs(qlo[0]) dul['sumQuat'][1] += abs(qlo[1]) dul['sumQuat'][2] += abs(qlo[2]) dul['sumQuat'][3] += abs(qlo[3]) dul['sumEuler'][0] += item['euler'][0] dul['sumEuler'][1] += item['euler'][1] dul['sumEuler'][2] += item['euler'][2] dul['sumFrac'][0] += item['frac'][0] dul['sumFrac'][1] += item['frac'][1] dul['sumFrac'][2] += item['frac'][2] dul['sumLlg'] += item['llg'] dul['sumZscore'] += item['zscore'] except: pass if hp.len() > 0: dul['quaternion'][0] = signs[0] * (dul['sumQuat'][0] / hp.len()) dul['quaternion'][1] = signs[1] * (dul['sumQuat'][1] / hp.len()) dul['quaternion'][2] = signs[2] * (dul['sumQuat'][2] / hp.len()) dul['quaternion'][3] = signs[3] * (dul['sumQuat'][3] / hp.len()) dul['euler'][0] = dul['sumEuler'][0] / hp.len() dul['euler'][1] = dul['sumEuler'][1] / hp.len() dul['euler'][2] = dul['sumEuler'][2] / hp.len() dul['frac'][0] = dul['sumFrac'][0] / hp.len() dul['frac'][1] = dul['sumFrac'][1] / hp.len() dul['frac'][2] = dul['sumFrac'][2] / hp.len() dul['llg'] = dul['sumLlg'] / hp.len() dul['zscore'] = dul['sumZscore'] / hp.len() dizio['centroid'] = dul dizio['heapSolutions'] = hp Clus.append(dizio) return Clus def evaluateFTF(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, ensembles, excludeZscore, fixed_frags, quate, mode, laue, listNCS, clusteringMode, cell_dim, thresholdCompare, evaLLONG, tops=None, LIMIT_CLUSTER=None, convNames={}, applyNameFilter=False, renamePDBs=True, giveids=False, isArcimboldo=False, ent=None, models_directory=None, model_file=None, excludeZscoreRNP=0.0, cleanshsol=True, make_positive_llg=False, is_verification=False): """ Process the output for the phaser steps that involve translations (translation, packing, rigid body refinement and gimble) :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: :type DicGridConn: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param nqueue: :type nqueue: :param ensembles: :type ensembles: :param excludeZscore: :type excludeZscore: :param fixed_frags: :type fixed_frags: :param quate: :type quate: :param mode: :type mode: :param laue: :type laue: :param listNCS: :type listNCS: :param clusteringMode: :type clusteringMode: :param cell_dim: cell dimensions as read from phaser out at the initial anisotropy correction :type cell_dim: list :param thresholdCompare: threshold for the algorithm of rotation clustering :type thresholdCompare: float :param evaLLONG: Checking of the elongation of helices if the clustering algorithm is distributionCV. Deactivated. :type evaLLONG: boolean :param tops: limit to the number of files produced by phaser to process :type tops: int :param LIMIT_CLUSTER: numerical id of the cluster where to perform the evaluation of the translation :type LIMIT_CLUSTER: int :param convNames: :type convNames: dict :param applyNameFilter: If True, only save top solution for each model :type applyNameFilter: boolean :param renamePDBs: indicates whether the output PDBs must be renamed :type renamePDBs: boolean :param giveids: :type giveids: :param isArcimboldo: :type isArcimboldo: :param ent: :type ent: :param models_directory: :type models_directory: :param model_file: :type model_file: :param excludeZscoreRNP: :type excludeZscoreRNP: :param cleanshsol: :type cleanshsol: :return: :rtype: """ dirente = outputDicr if dirente[-1] == '/': dirente = dirente[:-1] if hasattr(cm, 'channel'): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) status = '--' fromIn = 0 # Will be incremented for every translation file (.out) evaluated toIn = nqueue - 1 poolrota = [] foms = {'llg': [numpy.inf, 0.0], 'zscore': [numpy.inf, 0.0]} candelete = True listaAllsol = [[], [], {}] nfixfrags = 0 ndir = 0 dirente2 = '' current_dir2 = '' rnp_sol = {} while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print "Evaluating translation file number %s/%s" % (fromIn, toIn) # NS CHANGED if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, './' + str(ndir) + '/') dirente2 = outputDic if dirente2[-1] == '/': dirente2 = dirente2[:-1] if hasattr(cm, 'channel'): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 havetoskip = False while 1: if hasattr(cm, 'channel'): wse = cm.get_remote_file(str(fromIn) + '.out', os.path.join(outputDic, str(fromIn) + '.out'), conditioEND=PHASER_OUT_ANY_CASE, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.out not ready sleeping 3 seconds...' time.sleep(3) continue failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, failure_test=PHASER_OUT_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break if mode == 'PACK': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkpack = f.read() f.close() if checkpack.count(str(fromIn) + '.sol') == 1: wse = cm.get_remote_file(str(fromIn) + '.sol', os.path.join(outputDic, str(fromIn) + '.sol'), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.sol not ready sleeping 3 seconds...' time.sleep(3) continue else: wse = cm.get_remote_file(str(fromIn) + '.sol', os.path.join(outputDic, str(fromIn) + '.sol'), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.sol not ready sleeping 3 seconds...' time.sleep(3) continue if mode == 'RNP' or mode == 'RNP_GIMBLE': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() # listas = list(set(re.findall(str(fromIn) + '\\.[0-9]+\\.pdb', checkrnp))) #CM phaser 2.7 listas = re.findall(r'Map coefficient calculated for top solution', checkrnp) # CM phaser 2.8 listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: rnp_sol[ler] = len(ler) wse = cm.get_remote_file(ler, os.path.join(outputDic, ler), lenght_ext=len(ler), conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print 'File ' + str(fromIn) + '.1.pdb not ready sleeping 3 seconds...' time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + '.out')): # Multiprocessing failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + '.out'), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, failure_test=PHASER_OUT_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break while True: atest = False btest = False if mode == 'PACK': f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkpack = f.read() f.close() if checkpack.count(str(fromIn) + '.sol') == 1: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + '.sol')) else: atest = True else: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + '.sol')) if mode == 'RNP' or mode == "RNP_GIMBLE": f = open(os.path.join(outputDic, str(fromIn) + '.out'), 'r') checkrnp = f.read() f.close() listas = re.findall(r'Map coefficient calculated for top solution',checkrnp) listas = [str(fromIn) + "." + str(l + 1) + ".pdb" for l in range(len(listas))] for ler in listas: # For each pdb file in listas rnp_sol[ler] = len(ler) btest = os.path.exists(os.path.join(outputDic, ler)) if not btest: break else: btest = True if atest and btest: break break else: time.sleep(3) continue if mode == "RNP_GIMBLE": riprova = True PDBCREATED = "" PDBORIGINAL = "" while riprova: try: try: fer = open(outputDic + str(fromIn) + ".sol", "r") ferli = fer.readlines() fer.close() llg = 0.0 zscore = 0.0 data_chain = {} first = False for ferlinea in ferli: if ferlinea.startswith("SOLU SET"): # e.g. SOLU SET LLG=72 TFZ==4.9 if first: break else: listy = ferlinea.split() llg = float(listy[2].split("=")[-1]) first = True if ferlinea.startswith("SOLU 6DIM"): # 6.sol:SOLU 6DIM ENSE ensemble5[c] EULER 129.248 1.214 229.827 # FRAC -0.15914 -0.00884 0.10078 BFAC 0.00000 topl = ferlinea.split() chainid = topl[4][-1] # print 'ferlinea',ferlinea data_chain[chainid] = [float(topl[5]), float(topl[6]), float(topl[7]), float(topl[9]), float(topl[10]), float(topl[11])] # print 'data_chain[',chainid,']',data_chain[chainid] if os.path.exists(models_directory): structure = Bioinformatics.getStructure("full", os.path.join(outputDic, str(fromIn) + ".1.pdb")) new_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() new_list_atoms += chain_atoms # print "------",len(new_list_atoms),len(new_list_atoms) new_list_atoms = sorted(new_list_atoms, key=lambda x: x.get_parent().get_full_id()[3][1:]) ori_path = SystemUtility.findInSubdirectory( "".join(os.path.basename(convNames[str(fromIn) + ".1.pdb"][0]).split("nogyre")), subdirectory=models_directory) if model_file != None and os.path.exists(model_file): ori_path = model_file # new_list_atoms = sorted(new_list_atoms,__cmp_atom) if not os.path.exists(ori_path): ori_path = os.path.join(models_directory, "".join((os.path.basename( convNames[str(fromIn) + ".1.pdb"][0]).split("-")[0] + ".pdb").split("nogyre"))) structure = Bioinformatics.getStructure("full", ori_path) old_list_atoms = [] for model in structure.get_list(): for chain in model.get_list(): chain_atoms = [] chain_name = chain.get_id() for resi in chain.get_list(): for aty in resi: chain_atoms += [aty] # resi.get_list() #resi.get_unpacked_list() old_list_atoms += chain_atoms # print "------",len(old_list_atoms),len(new_list_atoms) old_list_atoms = sorted(old_list_atoms, key=lambda x: x.get_parent().get_full_id()[3][ 1:]) # sort by residue number PDBCREATED = Bioinformatics.getPDBFromListOfAtom(new_list_atoms)[0] PDBORIGINAL = Bioinformatics.getPDBFromListOfAtom(old_list_atoms)[0] try: # NOTE CM now I need to also change to new algorithm dictionary_dist = Bioinformatics3.get_CA_distance_dictionary( io.StringIO(SystemUtility.py2_3_unicode(PDBORIGINAL)), io.StringIO(SystemUtility.py2_3_unicode(PDBCREATED)), max_rmsd=1000, last_rmsd=1000, recompute_rmsd=True, cycles=1, cycle=1, before_apply="automatic", get_superposed_atoms=False) rmsT = Bioinformatics3.get_rmsd_from_distance_hash(dictionary_dist) nref = len(dictionary_dist) # original ncom = nref # created #(rmsT, nref, ncom, allAtoms, compStru, pda) = Bioinformatics.getSuperimp( # PDBORIGINAL, # PDBCREATED, # "PDBSTRINGBM_RESIDUES_CONSERVED", # algorithm="nigels-core2", # backbone=True, # superpose_exclude=1, # n_iter=None, # onlyCA=True) except: print "Error on file" print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsT = 100 nref = 0 ncom = 0 rmsd = rmsT if rmsd == 100: nref = 0 ncom = 0 except: print sys.exc_info() traceback.print_exc(file=sys.stdout) rmsd = 100 nref = 0 ncom = 0 # Write the info about the changes in the model in a sum file f = open(outputDic + "../models.sum", "a") f.write("===========\n") f.write("MODEL: " + str(fromIn) + " CORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(fromIn) + ".1.pdb"][0])) + " RMSD_GYRE: " + str( "%.2f" % rmsd) + "\n") f.write("LLG: " + str(llg) + "\t" + "ZSCORE: " + str(zscore) + "\t" + "NREF_ATM: " + str( nref) + "\t" + "NCOM_ATM: " + str(ncom) + "\n") f.write("===========\n") f.close() # Now compute the RMSD with respect to the final ent and write the information in a sum file nresc = -1 rms0 = -1 diffrms = -1 p = subprocess.Popen('grep -c " CA " ' + os.path.join(outputDic, str(fromIn) + ".1.pdb"), shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) outp, errp = p.communicate() outp = outp.strip() nresc = int(outp) nmin = nresc - 10 ndiff = -1 # if os.path.exists(ent): # (rmsent, nrefent, ncoment, allent, cment, dicent) = Bioinformatics.getSuperimp( # os.path.join(outputDic, str(fromIn) + ".1.pdb"), ent, "PDB", algorithm="superpose", # backbone=True) # (rms0, nref0, ncom0, all0, cm0, dic0) = Bioinformatics.getSuperimp(ori_path, ent, "PDB", # algorithm="superpose", # backbone=True) # rmsd_init = rms0 # core_init = ncom0 # dicc_init = dic0 # rmsd_fin = rmsent # core_fin = ncoment # dicc_fin = dicent # # (rmsd_core_common_init, rmsd_core_common_fin, # core_common) = Bioinformatics.getRMSDfromCommonCore(dicc_init, dicc_fin) # # diffrmsd = rmsd_core_common_init - rmsd_core_common_fin # # print "Model:",os.path.join(outputDic,str(i)+".1.pdb"),"RMSD",rmsent,"NREF",nrefent,"NCOM",ncoment # f = open(outputDic + "../pm_gimble.sum", "a") # f.write("MODEL: " + convNames[str(fromIn) + ".1.pdb"][0] + " SIZE: " + str( # nresc) + " RMSD_GYRE: " + str("%.2f" % rmsd) + " RMSD_INIT: " + str( # "%.2f" % rmsd_init) + " CORE_INIT: " + str(core_init) + " RMSD_FIN: " + str( # "%.2f" % rmsd_fin) + " CORE_FIN: " + str(core_fin) + " RMSD_INIT_COMMON: " + str( # "%.2f" % rmsd_core_common_init) + " RMSD_FIN_COMMON: " + str( # "%.2f" % rmsd_core_common_fin) + " RMSD_DIFF: " + str( # "%.2f" % diffrmsd) + " CORE_COMMON: " + str(len(core_common.keys())) + " LLG: " + str( # llg) + "\n") # f.close() # riprova = False except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True if not havetoskip: ensembles, listaAllsol, nfixfrags = clusterAtOnceSols(DicParameters=DicParameters, listaAllsol=listaAllsol, isArcimboldo=isArcimboldo, renamePDBs=renamePDBs, rnp_sol=copy.deepcopy(rnp_sol), baseDir=outputDic, name=str(fromIn), quate=quate, laue=laue, listNCS=listNCS, excludeZSCORE=excludeZscore, mode=mode, ClusteringMode=clusteringMode, ensembles=ensembles, cell_dim=cell_dim, thresholdCompare=thresholdCompare, evaLLONG=evaLLONG, convNames=convNames, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids, excludeZscoreRNP=excludeZscoreRNP, make_positive_llg=make_positive_llg, is_verification=is_verification) # NS remove unselected pdb files after filtering on TZF== during RNP if mode == 'RNP': # NOTE: also the ones that are excluded by negative LLg must be removed #and excludeZscoreRNP>0 : pdbFilesToDelete = glob.glob(outputDic + str(fromIn) + '.*.pdb') print "Deleting the files: %s" % pdbFilesToDelete for pdbf in pdbFilesToDelete: os.remove(pdbf) if cleanshsol: # Deleting sol, out and sh files, NS: put a variable from the argument list try: nb = fromIn if os.path.exists(outputDic + str(nb) + '.sol'): os.remove(outputDic + str(nb) + '.sol') if os.path.exists(outputDic + str(nb) + '.out'): os.remove(outputDic + str(nb) + '.out') fixfrfr = 0 while True: try: os.unlink(outputDic + str(fixfrfr) + '_' + str(nb) + '.pdb') fixfrfr += 1 except: break if os.path.exists(outputDic + str(nb) + '.sh'): os.remove(outputDic + str(nb) + '.sh') if os.path.exists(outputDic + str(nb) + '.sum'): os.remove(outputDic + str(nb) + '.sum') if os.path.exists(outputDic + str(nb) + '.rlist'): os.remove(outputDic + str(nb) + '.rlist') except: print 'Cannot find the file to delete.' fromIn += 1 # Next translation file to evaluate if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime('%Y-%m-%d %H:%M') print 'Not all models are yet performed...' + ti if not yetEvaluated and not startedLocal: print 'Long sleep, queue not ready' time.sleep(60) for root, subFolders, files in os.walk(outputDicr): for fileu in files: pdbf = os.path.join(root, fileu) if pdbf.endswith(".pdb"): # s = re.findall('[0-9]+\\.[0-9]+\\.[0-9]+\\.pdb', fileu) if len(fileu.split(".")) > 3: os.remove(pdbf) if hasattr(cm, 'channel'): cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) print 'Now all models are performed.' SystemUtility.close_connection(DicGridConn, DicParameters, cm) listaAllsol.append([]) ClusAll = saveRotations(DicParameters, listaAllsol, LIMIT_CLUSTER=LIMIT_CLUSTER, applyNameFilter=applyNameFilter) return (ClusAll, ensembles, nfixfrags) def removeParallelFragments(Clusters): """ :param Clusters: :type Clusters: :return: :rtype: """ Clus = [] for cla in Clusters: lista = cla["heapSolutions"].asList() hp = ADT.Heap() for item in lista: prio, rota = item liNot = [] for fix in rota["fixed_frags"]: liNot.append(int(fix["n_prev_cluster"])) if int(rota["n_prev_cluster"]) not in liNot: hp.push(prio, rota) cla["heapSolutions"] = hp Clus.append(cla) return Clus def eraseRotationsInList(Clusters, done): """ :param Clusters: :type Clusters: :param done: :type done: :return: :rtype: """ for clu in Clusters: hp = ADT.Heap() for item in clu["heapSolutions"]: prio, rota = item if rota["n_prev_cluster"] not in done: hp.push(prio, rota) clu["heapSolutions"] = hp return Clusters def divideInGroupsForFixedFrags(Clusters): """ :param Clusters: :type Clusters: :return: :rtype: """ Clus = [] for tru in Clusters: lista = tru["heapSolutions"].asList() for item in lista: prio, rota = item fix = rota["fixed_frags"] if fix not in Clus: Clus.append(fix) return Clus def getClustersUsed(Clusters): """ :param Clusters: :type Clusters: :return: :rtype: """ used = [] for clu in Clusters: lista = clu["heapSolutions"].asList() for item in lista: prio, rota = item if rota["n_prev_cluster"] not in used: used.append(rota["n_prev_cluster"]) if "fixed_frags" in rota: for fix in rota["fixed_frags"]: if fix["n_prev_cluster"] not in used: used.append(fix["n_prev_cluster"]) return used def eraseClustersInList(comparing, done): """ :param comparing: :type comparing: :param done: :type done: :return: :rtype: """ Clus = [] for u in range(len(comparing)): if u not in done: Clus.append(comparing[u]) return Clus def createCombinations(Clus, comparing): """ :param Clus: :type Clus: :param comparing: :type comparing: :return: :rtype: """ Clusters = [None for _ in range(nOfCluster)] # print "Creating Clusters of len",len(Clusters) for fixed in Clus: liNot = [] for rot in fixed: liNot.append(int(rot["n_prev_cluster"])) clust = liNot[0] # print "Put in clusters", clust for i in range(len(comparing)): if i in liNot: continue """ if clust == 0: if i in liNot or i= 0: din["longest"] = [0, rotaz["elong"]] elif "longest" not in din.keys() and rotaz["elong"] < 0: din["longest"] = [rotaz["elong"], 0] elif (rotaz["elong"] < 0 and din["longest"][0] > rotaz["elong"]): din["longest"][0] = rotaz["elong"] elif (rotaz["elong"] >= 0 and din["longest"][1] < rotaz["elong"]): din["longest"][1] = rotaz["elong"] hp.push(priority, rotaz) din["heapSolutions"] = hp Clusters[clust] = din # print "...rotation inserted in cluster: "+str(0)+"..." else: priority = (-1 * rotaz["llg"], -1 * rotaz["zscore"]) rotaz["fixed_frags"] = fixed if "longest" not in (Clusters[clust]).keys() and rotaz["elong"] >= 0: (Clusters[clust])["longest"] = [0, rotaz["elong"]] elif "longest" not in (Clusters[clust]).keys() and rotaz["elong"] < 0: (Clusters[clust])["longest"] = [rotaz["elong"], 0] elif (rotaz["elong"] < 0 and (Clusters[clust])["longest"][0] > rotaz["elong"]): (Clusters[clust])["longest"][0] = rotaz["elong"] elif (rotaz["elong"] >= 0 and (Clusters[clust])["longest"][1] < rotaz["elong"]): (Clusters[clust])["longest"][1] = rotaz["elong"] ((Clusters[clust])["heapSolutions"]).push(priority, rotaz) return Clusters def evaluateFRF_MPR(DicParameters, GRID_TYPE, QNAME, FRACTION, PARTITION, cm, sym, nice, DicGridConn, RotClu, nameJob, outputDicr, nqueue, quate, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo=False, tops=None, LIMIT_CLUSTER=None, applyNameFilter=False, candelete=True, giveids=False, merge=[],make_positive_llg=False): """ Evaluates phaser's fast rotation function. :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param GRID_TYPE: :type GRID_TYPE: :param QNAME: :type QNAME: :param FRACTION: :type FRACTION: :param PARTITION: :type PARTITION: :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param nice: :type nice: :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param RotClu: :type RotClu: :param nameJob: name for the job (will be used for sending them) :type nameJob: str :param outputDicr: :type outputDicr: :param nqueue: :type nqueue: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param spaceGroup: :type spaceGroup: :param ensembles: :type ensembles: :param clusteringAlg: :type clusteringAlg: :param excludeLLG: :type excludeLLG: :param fixed_frags: :type fixed_frags: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param isArcimboldo: :type isArcimboldo: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param applyNameFilter: :type applyNameFilter: :param candelete: :type candelete: :param giveids: :type giveids: :param merge: :type merge: :param make_positive_llg: :type make_positive_llg: :return: :rtype: """ f = open(os.path.join(outputDicr, nameJob + ".pic"), "w") #print " Starting the evaluateFRF_MPR function" lista_dati = [PATH_NEW_PHASER, PATH_NEW_SHELXE, PATH_NEW_ARCIFIRE, DicParameters, nice, DicGridConn, RotClu, nameJob, outputDicr, nqueue, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo, tops, LIMIT_CLUSTER, applyNameFilter, candelete, giveids, merge, sym, GRID_TYPE, QNAME, FRACTION, PARTITION] pickle.dump(lista_dati, f) pickle.dump(os.path.join(outputDicr, nameJob + "_end.txt"), f) f.close() def startJob(outputDirectory, op): """ :param outputDirectory: path to folder for the output :type outputDirectory: str :param op: :type op: :return: :rtype: """ if os.path.exists(os.path.join(outputDirectory, "merged.sum")) or os.path.exists( os.path.join(outputDirectory, "unmerged.sum")): return print "Executing...", "nice", "-n" + str(nice), PATH_NEW_ARCIFIRE, "-r", os.path.join(outputDirectory, op) p = subprocess.Popen(["nice", "-n" + str(nice), PATH_NEW_ARCIFIRE, "-r", os.path.join(outputDirectory, op)], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=outputDirectory) out, err = p.communicate() f.close() print err execu = "" if hasattr(cm, "channel"): cm.copy_local_file(os.path.join(outputDicr, nameJob + ".pic"), nameJob + ".pic", force_cumulative=False) cm.copy_local_file(PATH_NEW_ARCIFIRE, os.path.basename(PATH_NEW_ARCIFIRE)) execu = os.path.join(cm.get_remote_pwd(), os.path.basename(PATH_NEW_ARCIFIRE)) else: execu = PATH_NEW_ARCIFIRE if cm is None: try: if len(SystemUtility.NODES) == 0: if sym.PROCESSES > 0: print "I found ", sym.REALPROCESSES, "CPUs." while 1: if len(threading.enumerate()) <= sym.PROCESSES: p = SystemUtility.OutputThreading(startJob, outputDicr, os.path.join(outputDicr, nameJob + ".pic")) p.start() break else: print "FATAL ERROR: I cannot load correctly information of CPUs." sym.couldIClose = True sys.exit(0) else: while 1: comando = "" + PATH_NEW_ARCIFIRE + " -r " + os.path.join(outputDicr, nameJob + ".pic") + " > /dev/null" # +nameDir+"log_output_"+str(c)+".out" SystemUtility.launchCommand(comando, os.path.join(outputDicr, nameJob + "_end.txt"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) break except KeyboardInterrupt: print "The user requires to exit from Borges." sym.couldIClose = True sys.exit(0) else: job = Grid.gridJob(nameJob) if hasattr(cm, "channel"): job.setInitialDir(cm.get_remote_pwd()) else: job.setInitialDir(os.path.abspath(outputDicr)) script = """#!/bin/bash if [ ! -f """ + execu + """ ]; then """ + execu + """ -r """ + os.path.join(outputDicr, nameJob + ".pic") + """ > /dev/null else """ + os.path.basename(execu) + """ -r """ + os.path.join(outputDicr, nameJob + ".pic") + """ > /dev/null fi """ f = open(os.path.join(outputDicr, nameJob + ".sh"), "w") f.write(script) f.close() st = os.stat(os.path.join(outputDicr, nameJob + ".sh")) os.chmod(os.path.join(outputDicr, nameJob + ".sh"), st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) job.setExecutable(os.path.join(outputDicr, nameJob + ".sh")) job.addInputFile(os.path.join(outputDicr, nameJob + ".pic"), False) job.addInputFile(execu, False) job.addOutputFile("merged.sum", False) job.addOutputFile("unmerged.sum", False) (nc, nq) = cm.submitJob(job, isthelast=True) SystemUtility.LISTJOBS[nameJob] = [(os.path.join(outputDicr, nameJob + "_end.txt"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST, True, "")] print "Search in " + str(nq) + " structure submitted to the cluster " + str(nc) return os.path.join(outputDicr, "merged.sum"), os.path.join(outputDicr, "unmerged.sum") def evaluateFRF_clusterOnce(DicParameters, cm, sym, DicGridConn, RotClu, nameJob, outputDicr, nqueue, quate, laue, ncs, spaceGroup, ensembles, clusteringAlg, excludeLLG, fixed_frags, cell_dim, thresholdCompare, evaLLONG, isArcimboldo=False, tops=None, LIMIT_CLUSTER=None, applyNameFilter=False, candelete=True, giveids=False, merge=[], make_positive_llg=False): """ :param DicParameters: :type DicParameters: :param cm: :type cm: :param sym: :type sym: :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param RotClu: :type RotClu: :param nameJob: :type nameJob: :param outputDicr: :type outputDicr: :param nqueue: :type nqueue: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param spaceGroup: :type spaceGroup: :param ensembles: :type ensembles: :param clusteringAlg: :type clusteringAlg: :param excludeLLG: :type excludeLLG: :param fixed_frags: :type fixed_frags: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :param isArcimboldo: :type isArcimboldo: :param tops: :type tops: :param LIMIT_CLUSTER: :type LIMIT_CLUSTER: :param applyNameFilter: :type applyNameFilter: :param candelete: :type candelete: :param giveids: :type giveids: :param merge: :type merge: :param make_positive_llg: :type make_positive_llg: :return: :rtype: """ global LAST_AVAILABLE_ROTID status = "--" fromIn = 0 toIn = nqueue - 1 foms = {"llg": [numpy.inf, 0.0], "zscore": [numpy.inf, 0.0]} listaAllrot = [[], [], {}, [[], RotClu]] dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: print "Evaluating rotation file" + str(fromIn) # NS CHANGE if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".out", os.path.join(outputDic, str(fromIn) + ".out"), conditioEND=PHASER_OUT_END_CONDITION, testEND=PHASER_OUT_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".out not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(fromIn) + ".rlist", os.path.join(outputDic, str(fromIn) + ".rlist"), lenght_ext=6, conditioEND=PHASER_RLIST_SOL_PDB_END_CONDITION, testEND=PHASER_RLIST_SOL_PDB_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".rlist not ready sleeping 3 seconds..." time.sleep(3) continue # print cm.remove_remote_file(str(fromIn)+".rlist") # print cm.remove_remote_file(str(fromIn)+".out") # print cm.remove_remote_file(str(fromIn)+".sum") # print cm.remove_remote_file(str(fromIn)+".sh") # print cm.remove_remote_file(str(fromIn)+".pdb") break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".out")): checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".out"), PHASER_OUT_END_CONDITION_LOCAL, PHASER_OUT_END_TEST) while True: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".rlist")) if atest: break break else: # print "File ",os.path.join(outputDic,str(fromIn)+".out"),os.path.exists(os.path.join(outputDic,str(fromIn)+".out")) time.sleep(3) continue lastFile = False if fromIn == toIn: lastFile = True if not giveids: listaAllrot = clusterAtOnce(DicParameters=DicParameters, listaAllrot=listaAllrot, isArcimboldo=isArcimboldo, baseDir=outputDic, name=str(fromIn), quate=quate,laue=laue, listNCS=ncs, excludeLLG=excludeLLG, mode="FRF", ClusteringMode=clusteringAlg, ensembles=ensembles, cell_dim=cell_dim, thresholdCompare=thresholdCompare, evaLLONG=evaLLONG, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids, applyNameFilter=applyNameFilter, lastFile=lastFile, make_positive_llg=make_positive_llg) else: ensembles, listaAllrot = clusterAtOnce(DicParameters=DicParameters, listaAllrot=listaAllrot, isArcimboldo=isArcimboldo, baseDir=outputDic, name=str(fromIn), quate=quate, laue=laue, listNCS=ncs, excludeLLG=excludeLLG, mode="FRF", ClusteringMode=clusteringAlg, ensembles=ensembles, cell_dim=cell_dim, thresholdCompare=thresholdCompare, evaLLONG=evaLLONG, tops=tops, LIMIT_CLUSTER=LIMIT_CLUSTER, giveids=giveids, applyNameFilter=applyNameFilter, lastFile=lastFile,make_positive_llg=make_positive_llg) candelete = True if candelete: try: nb = fromIn os.remove(outputDic + str(nb) + ".rlist") os.remove(outputDic + str(nb) + ".out") os.remove(outputDic + str(nb) + ".pdb") for rt in range(fixed_frags): try: os.remove(outputDic + str(rt) + "_" + str(nb) + ".pdb") except: pass os.remove(outputDic + str(nb) + ".sh") os.remove(outputDic + str(nb) + ".sum") except Exception: # print sys.exc_info() # traceback.print_exc(file=sys.stdout) # candelete = False # sys.exit(0) pass fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file("rot.mtz") cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) SystemUtility.close_connection(DicGridConn, DicParameters, cm) if giveids and len(merge) == 0: return listaAllrot[3][0], listaAllrot[3][1], ensembles elif len(merge) > 0: return __change_rot_clusterid(merge, listaAllrot[3][0], quate, laue, ncs, clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG) else: return listaAllrot[3][0], listaAllrot[3][1] def __change_rot_clusterid(template, data, quate, laue, ncs, clusteringAlg, ensembles, cell_dim, thresholdCompare, evaLLONG): """ :param template: :type template: :param data: :type data: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param clusteringAlg: :type clusteringAlg: :param ensembles: :type ensembles: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param thresholdCompare: :type thresholdCompare: :param evaLLONG: :type evaLLONG: :return: :rtype: """ visited = [] numc = 0 unmerged = [] for a in range(len(data)): clu1 = data[a] if len(clu1["heapSolutions"].asList()) == 0: continue item1 = clu1["heapSolutions"].pop() clu1q = item1[1] clu1["heapSolutions"].push(item1[0], item1[1]) numc = clu1q["n_prev_cluster"] performed = False numd = -1 for b in range(len(template)): if b in visited: continue clu2 = template[b] if len(clu2["heapSolutions"].asList()) == 0: continue item2 = clu2["heapSolutions"].pop() clu2q = item2[1] clu2["heapSolutions"].push(item2[0], item2[1]) re, elo = compareRotation(clu1q, clu2q, thresholdCompare, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG) if re: visited.append(b) performed = True numd = clu2q['n_prev_cluster'] break if performed: for ite in clu1["heapSolutions"].asList(): prio2 = ite[0] rota2 = ite[1] rota2['n_prev_cluster'] = numd # NOTE: the following instruction imply we are always working with 1 fixed frag. This method thus # it is not supported for ARCIMBOLDO but just for ARCIMBOLDO-BORGES rota2['original_rotcluster'] = numd # priority = (-1 * rota2['llg'], -1 * rota2['zscore']) # clu1["heapSolutions"].push(priority,rota2) else: unmerged.append(copy.deepcopy(clu1)) clu1["heapSolutions"] = ADT.Heap() return data, unmerged, ensembles def __clusterSetOfRotations(isArcimboldo, quaternions, ref_labels, ref_rotaz, thresholdCompare, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, prefilter=None): """ :param isArcimboldo: :type isArcimboldo: :param quaternions: :type quaternions: :param ref_labels: :type ref_labels: :param ref_rotaz: :type ref_rotaz: :param thresholdCompare: :type thresholdCompare: :param clusteringAlg: :type clusteringAlg: :param quate: :type quate: :param laue: :type laue: :param ncs: :type ncs: :param ensembles: :type ensembles: :param cell_dim: list with the unit cell parameters :type cell_dim: list :param evaLLONG: :type evaLLONG: :param prefilter: :type prefilter: :return: :rtype: """ # kappa,whitened = Bioinformatics.__CrossValidationKParameter(quaternions,2,-0.1) # labels = scipy.cluster.hierarchy.fclusterdata(listaAllrot[0], 0.5, method='average') if len(quaternions) > 10000: kappa = None labels = None if isArcimboldo: kappa, labels = Bioinformatics.__STDCentroidKParameter(quaternions, 4, 0.0001, criteria="mean", oneByone=True) else: kappa, labels = Bioinformatics.__STDCentroidKParameter(quaternions, 2, 0.0001, criteria="mean", oneByone=True) # groups, labels = scipy.cluster.vq.kmeans(whitened,kappa,iter=20,minit="points") print "Performing a cluster with K-Means with K=" + str(kappa) subclu = [[] for _ in range(kappa)] for p in range(len(labels)): ind = labels[p] name = ref_labels[p] subclu[ind].append(ref_rotaz[name]) else: subclu = [[] for _ in range(len(quaternions))] for p in range(len(quaternions)): name = ref_labels[p] subclu[p].append(ref_rotaz[name]) tomerge = {} merged = [] # print "Numero: ",len(subclu) # print subclu # for clus in subclu: # print "CLUSTER======================" # for rotaz in clus: # print rotaz["euler"],ADT.cantor_pairing([rotaz["euler"][0]*3,rotaz["euler"][1]*1,rotaz["euler"][2]*2]) # print "=================================" for plu in range(len(subclu)): clus = subclu[plu] if len(clus) == 0: # print "Empty cluster",plu,"skipping..." continue rotaz = clus[0] inserted = False # print "Evaluating centroid ",plu for tr in range(len(merged)): # print "comparing with cluster",tr,"of",len(merged) rot = merged[tr][0] indsubclu = merged[tr][1] result = False elong = 0 # print "###################################" # print "rocci1",rotaz["euler"] # print "rocci2",rot["euler"] if prefilter == None or prefilter <= 0: result, elong = compareRotation(rotaz, rot, 3, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) else: result, elong = compareRotation(rotaz, rot, prefilter, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) if result: # print "Rotation is compatible with cluster",tr,"with an elongation of",elong rotaz["elong"] = elong inserted = True if indsubclu not in tomerge.keys(): tomerge[indsubclu] = [plu] else: tomerge[indsubclu] += [plu] break if not inserted: # print "Inserting rotation in a new cluster" rotaz["elong"] = 0 merged.append((rotaz, plu)) tomerge[plu] = [] # print "Numero ex clusters",len(subclu) # print "Numero nuovi clusters",len(merged) # print "Accorpamenti" # print tomerge startT = 6 if prefilter == None or prefilter <= 0: startT = 6 else: startT = 3 for key1 in tomerge.keys(): liuno = subclu[key1] for k in tomerge[key1]: liuno += subclu[k] liuno = sorted(liuno, __cmp_rota, reverse=True) liu = [liuno[0]] subclu[key1] = liu tomerge[key1] = [] for trrr in numpy.arange(startT, thresholdCompare + 3, 3): doSomething = True while doSomething: doSomething = False for key1 in tomerge.keys(): for key2 in tomerge.keys(): if key1 == key2: continue result = False elong = 0 liuno = sorted(subclu[key1], __cmp_rota, reverse=True) lidue = sorted(subclu[key2], __cmp_rota, reverse=True) rotaz = liuno[0] rot = lidue[0] # print "###################################" # print "rocci1",rotaz["euler"] # print "rocci2",rot["euler"] result, elong = compareRotation(rotaz, rot, trrr, clusteringAlg, quate, laue, ncs, ensembles, cell_dim, evaLLONG, print_angles=False) if result: # print "Rotation is compatible with cluster",tr,"with an elongation of",elong rotaz["elong"] = elong tomerge[key1] = tomerge[key1] + tomerge[key2] + [key2] del tomerge[key2] doSomething = True break # to the first for if doSomething: break # to the while nuovo_clus = [] indes = 0 for key in tomerge.keys(): value = tomerge[key] + [key] # pdb_done = [] dictio = {} dictio["heapSolutions"] = ADT.Heap() # print "NEW CLUSTER==================================" for va in value: subclu[va] = sorted(subclu[va], __cmp_rota, reverse=True) for rotaz in subclu[va]: # print rotaz["euler"],rotaz["llg"],rotaz["name"],"----",pdb_done # if not isArcimboldo and rotaz["name"] in pdb_done: # print "Not inserted!!" # continue # pdb_done.append(rotaz["name"]) dictio["heapSolutions"].push((-1 * rotaz["llg"], -1 * rotaz["zscore"]), rotaz) nuovo_clus.append(copy.deepcopy(dictio)) indes += 1 # print "=================================================" tome = {} for kj in range(len(nuovo_clus)): tome[kj] = [] print "Final:", tome # else: # tomerge = {} # for va in range(len(nuovo_clus)): # tomerge[va] = [] return tome, nuovo_clus def __cmp_atom(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ ai = a.get_full_id()[3] bi = a.get_full_id()[3] return cmp(ai, bi) def __cmp_statsrt(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ ai = a[0] bi = b[0] return cmp(ai, bi) def __cmp_cluster_zscore(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ li1 = sorted(a["heapSolutions"].asList(), __cmp_rota3, reverse=True) if len(li1) > 0: prioa, rota = li1[0] else: prioa = (-1, -1) rota = {"zscore": -10000000000000} li2 = sorted(b["heapSolutions"].asList(), __cmp_rota3, reverse=True) if len(li2) > 0: priob, rotb = li2[0] else: priob = (-1, -1) rotb = {"zscore": -10000000000000} ai = float(rota["zscore"]) bi = float(rotb["zscore"]) return cmp(ai, bi) def __cmp_cluster(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ li1 = sorted(a["heapSolutions"].asList(), __cmp_rota2, reverse=True) if len(li1) > 0: prioa, rota = li1[0] else: prioa = (-1, -1) rota = {"llg": -10000000000000} li2 = sorted(b["heapSolutions"].asList(), __cmp_rota2, reverse=True) if len(li2) > 0: priob, rotb = li2[0] else: priob = (-1, -1) rotb = {"llg": -10000000000000} ai = float(rota["llg"]) bi = float(rotb["llg"]) return cmp(ai, bi) def __cmp_rota(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ ai = float(a["llg"]) bi = float(b["llg"]) return cmp(ai, bi) def __cmp_rota2(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ ai = (float(a[1]["llg"]), float(a[1]["zscore"])) bi = (float(b[1]["llg"]), float(b[1]["zscore"])) return cmp(ai, bi) def __cmp_rota3(a, b): """ :param a: :type a: :param b: :type b: :return: :rtype: """ ai = (float(a[1]["zscore"]), float(a[1]["llg"])) bi = (float(b[1]["zscore"]), float(b[1]["llg"])) return cmp(ai, bi) def checkYOURoutput(myfile, conditioEND, testEND, sleep_ifnot_ready=True, failure_test=None): """ :param myfile: :type myfile: :param conditioEND: :type conditioEND: :param testEND: :type testEND: :param sleep_ifnot_ready: :type sleep_ifnot_ready: :param failure_test: :type failure_test: :return: :rtype: """ esegui = True correct = False while esegui: if not os.path.exists(myfile): if sleep_ifnot_ready: time.sleep(3) continue else: return False if failure_test != None and testEND != None and sleep_ifnot_ready: f = open(myfile) e = f.read() f.close() if isinstance(failure_test, str): numero = e.count(failure_test) # print "--------",myfile,failure_test,numero,testEND,conditioEND if numero == int(testEND): return None elif isinstance(conditioEND, list): numero = 0 for uu in conditioEND: numero += e.count(uu) if numero == int(testEND): return None if conditioEND != None and testEND != None: f = open(myfile) e = f.read() f.close() if isinstance(conditioEND, str): numero = e.count(conditioEND) # print "+++--------+++",myfile,numero,conditioEND,testEND,type(numero),type(testEND) if numero == int(testEND): esegui = False correct = True elif sleep_ifnot_ready: time.sleep(3) else: return False elif isinstance(conditioEND, list): numero = 0 for uu in conditioEND: numero += e.count(uu) if numero == int(testEND): esegui = False correct = True elif sleep_ifnot_ready: time.sleep(3) else: return False else: esegui = False correct = True return correct def evaluateExp(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, convNames, dirPath, method="fast", renamePDBs=True, insfile=None): """Function to evaluate results from SHELXE expansions with autotracing. :param DicParameters: :param cm: :param sym: :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param nameJob: :param outputDicr: :param nqueue: :param convNames: :param dirPath: :param method: :param renamePDBs: :param insfile: :return: """ status = "--" fromIn = 0 toIn = nqueue - 1 eseguitoUnaVolta = False startexp = ".pda" if insfile != None: startexp = ".phi" dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: riprova = True if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 havetoskip = False while riprova: try: while 1: if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".lst", os.path.join(outputDic, str(fromIn) + ".lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wse, bool) and not wse: wser = cm.get_remote_file(str(fromIn) + '.lst', os.path.join(outputDic, str(fromIn) + '.lst'), conditioEND=SHELXE_LST_FAILURE_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wser, bool) and not wser: print 'File ' + str(fromIn) + '.lst not ready sleeping 3 seconds...' time.sleep(3) continue else: havetoskip = True break wse = cm.get_remote_file(str(fromIn) + ".pdb", os.path.join(outputDic, str(fromIn) + ".pdb"), conditioEND=SHELXE_PDB_PHS_END_CONDITION, testEND=SHELXE_PDB_PHS_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".pdb not ready sleeping 3 seconds..." time.sleep(3) continue wse = cm.get_remote_file(str(fromIn) + ".phs", os.path.join(outputDic, str(fromIn) + ".phs"), conditioEND=SHELXE_PDB_PHS_END_CONDITION, testEND=SHELXE_PDB_PHS_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".phs not ready sleeping 3 seconds..." time.sleep(3) continue break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".lst")): failure = checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST, failure_test=SHELXE_LST_FAILURE_CONDITION_LOCAL) if failure == None: havetoskip = True break while True: atest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".pdb")) btest = os.path.exists(os.path.join(outputDic, str(fromIn) + ".phs")) if atest and btest: break break else: time.sleep(3) continue print "Evaluating " + str(fromIn) + " model corresponding to: " + str( convNames[str(fromIn) + startexp]) if not havetoskip and renamePDBs: shutil.move(outputDic + str(fromIn) + ".pdb", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".pdb")) if insfile == None: shutil.move(outputDic + str(fromIn) + ".pda", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pda")) else: shutil.move(outputDic + str(fromIn) + ".phi", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".phi")) shutil.move(outputDic + str(fromIn) + ".ins", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".ins")) shutil.move(outputDic + str(fromIn) + ".lst", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".lst")) shutil.move(outputDic + str(fromIn) + ".hkl", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".hkl")) shutil.move(outputDic + str(fromIn) + ".phs", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".phs")) try: shutil.move(outputDic + str(fromIn) + ".pdo", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pdo")) except: pass try: shutil.move(outputDic + str(fromIn) + ".ent", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".ent")) shutil.move(outputDic + str(fromIn) + "_trace.ps", outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( "_trace.ps")) except: pass if not havetoskip and method == "fast": pdbf = outputDic + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str(".pdb") shutil.copyfile(pdbf, dirPath + "/" + os.path.basename(convNames[str(fromIn) + startexp])[:-4] + str( ".pdb")) riprova = False if havetoskip: os.remove(outputDic + str(fromIn) + startexp) os.remove(outputDic + str(fromIn) + ".lst") os.remove(outputDic + str(fromIn) + ".hkl") try: os.remove(outputDic + str(fromIn) + ".ent") os.remove(outputDic + str(fromIn) + ".ins") os.remove(outputDic + str(fromIn) + ".phi") except: pass # if hasattr(cm,"channel"): # print cm.remove_remote_file(str(fromIn)+".pda") # print cm.remove_remote_file(str(fromIn)+".lst") # print cm.remove_remote_file(str(fromIn)+".pdb") # print cm.remove_remote_file(str(fromIn)+".phs") # print cm.remove_remote_file(str(fromIn)+".hkl") fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) except: print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True time.sleep(3) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file(PATH_NEW_SHELXE) cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) # NOTE CM: Temporary solution. Best CC is retrieved when writing the output but we rather get it from here #print 'SHERLOCK outputDic is ',outputDic #print 'SHERLOCK command is ',"grep -H CC "+ outputDic + '*.pdb' p = subprocess.Popen('grep -H CC ' + outputDic + '*.pdb', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() out = out.strip() #print 'SHERLOCK out',out #sys.exit(0) outlist = out.splitlines() finalcc = [] for lits in outlist: linlis = lits.split() #print 'SHERLOCK linlis',linlis fincc = float(linlis[6][:-1]) #print 'SHERLOCK fincc',fincc restr = int(linlis[7]) npdb = linlis[0][:-6] finalcc.append((fincc, restr, npdb)) finalcc = sorted(finalcc, reverse=True) backPDB = finalcc[0][2] backCC = finalcc[0][0] return backPDB,backCC def evaluateExp_CC(DicParameters, cm, sym, DicGridConn, nameJob, outputDicr, nqueue, convNames, isArcimboldo=False, usePDO=False, initcc_global=True, savePHS=False, archivingAsBigFile=True, phs_fom_statistics=False): """ :param DicParameters: :type DicParameters: :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param nameJob: name for the job :type nameJob: str :param outputDicr: :type outputDicr: :param nqueue: :type nqueue: :param convNames: :type convNames: :param isArcimboldo: :type isArcimboldo: :param usePDO: :type usePDO: :param initcc_global: :type initcc_global: :param savePHS: :type savePHS: :param archivingAsBigFile: :type archivingAsBigFile: :param phs_fom_statistics: :type phs_fom_statistics: :return: :rtype: """ status = "--" fromIn = 0 toIn = nqueue - 1 # rotas = (CLUDATA["heapSolutions"]).asList() ClustPDB = [] eseguitoUnaVolta = False hp = ADT.Heap() dirente = outputDicr if dirente[-1] == "/": dirente = dirente[:-1] if hasattr(cm, "channel"): current_dir = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente)) ndir = 0 dirente2 = "" current_dir2 = "" tarro = None outputDic = "" while fromIn <= toIn: yetEvaluated = True if yetEvaluated: if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: outputDic = os.path.join(outputDicr, "./" + str(ndir) + "/") dirente2 = outputDic if dirente2[-1] == "/": dirente2 = dirente2[:-1] if archivingAsBigFile: if tarro != None: tarro.close() tarro = tarfile.open(os.path.join(outputDic, "" + str(ndir) + ".tar.gz"), "w:gz") if hasattr(cm, "channel"): SystemUtility.remote_reconnection(cm.get_remote_pwd()) current_dir2 = cm.get_remote_pwd() print cm.change_remote_dir(os.path.basename(dirente2)) ndir += 1 riprova = True while riprova: try: while 1: print "Evaluating " + str(fromIn) + " model corresponding to: " + str( convNames[str(fromIn) + ".pda"]) if hasattr(cm, "channel"): wse = cm.get_remote_file(str(fromIn) + ".lst", os.path.join(outputDic, str(fromIn) + ".lst"), conditioEND=SHELXE_LST_END_CONDITION, testEND=SHELXE_LST_END_TEST) if isinstance(wse, bool) and not wse: print "File " + str(fromIn) + ".lst not ready sleeping 3 seconds..." time.sleep(3) continue if savePHS: wse = cm.get_remote_file(str(fromIn) + ".phs", os.path.join(outputDic, str(fromIn) + ".phs")) if usePDO: wse = cm.get_remote_file(str(fromIn) + ".pdo", os.path.join(outputDic, str(fromIn) + ".pdo")) break elif os.path.exists(os.path.join(outputDic, str(fromIn) + ".lst")): checkYOURoutput(os.path.join(outputDic, str(fromIn) + ".lst"), SHELXE_LST_END_CONDITION_LOCAL, SHELXE_LST_END_TEST) break else: time.sleep(3) continue f = open(outputDic + str(fromIn) + ".lst", "r") lines = f.readlines() f.close() nAtoms = -1 nER = -1 initCC = -1 finalCC = -1 wMPEa = -1 wMPEb = -1 wMPEc = -1 wMPEd = -1 shx = 0.0 shy = 0.0 shz = 0.0 p = subprocess.Popen('grep wMPE ' + outputDic + str(fromIn) + ".lst", shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) pout, perr = p.communicate() pout = pout.strip() outlist = pout.splitlines() if len(outlist) > 0: wMPEa = float("".join(outlist[0].split("/")).split()[-2]) wMPEb = float("".join(outlist[0].split("/")).split()[-1]) wMPEc = float("".join(outlist[-1].split("/")).split()[-2]) wMPEd = float("".join(outlist[-1].split("/")).split()[-1]) contrast = [-1] connect = [-1] for up in range(len(lines)): line = lines[up] liloline = (line.strip()).split() if (line.strip()).startswith("Overall CC between"): line3 = line.strip() line2 = (lines[up - 2]).strip() line2L = line2.split() line3L = line3.split() try: nER = int(line2L[0]) except Exception: nER = 0 initCC = float((line3L[-1])[:-1]) # print "INITCC is",initcc continue elif len(liloline) == 8 and liloline[1] == "Atoms" and liloline[2] == "read" and liloline[ 3] == "from" and liloline[4] == "PDB": nAtoms = int(liloline[0]) # print "NATOMS",nAtoms continue if (line.strip()).startswith("Estimated mean FOM ="): line = line.strip() lineL = line.split() finalCC = float(lineL[-2]) # print "FINAL CC", finalCC if (line.strip()).startswith("Shift from model"): line = line.strip() lineL = line.split(" ") shx = float(lineL[-3][3:]) shy = float(lineL[-2][3:]) shz = float(lineL[-1][3:]) # print "SHELXE origin shift",shx,shy,shz continue if (line.strip()).startswith("") and len(line.split()) > 9: line = line.strip() lineL = line.split() contrast.append(float(lineL[5][:-1])) connect.append(float(lineL[8])) # if initCC == -1: # continue riprova = False fe = open(outputDic + str(fromIn) + ".pda", "r") fer = fe.readlines() fe.close() clus = None for luc in fer: if luc.startswith("REMARK CLUSTER"): clus = luc.split()[2] break if clus is None: clus = "-1" if not hasattr(cm, "channel") and phs_fom_statistics: f = open(outputDic + str(fromIn) + ".phs", "r") allphs = f.readlines() f.close() summa_fom = [] for line in allphs: # lista = line.strip().split() summa_fom.append(float(line[23:29])) summa_fom = sorted(summa_fom) summa_fom = summa_fom[-500:] mean_fom = numpy.mean(summa_fom) std_fom = numpy.std(summa_fom) else: mean_fom = -1 std_fom = -1 f = open(outputDic + "../tempSolCC.sum", "a") if "_" in clus: clus = "_".join(map(lambda x: str(x), sorted(map(lambda x: int(x), clus.split("_"))))) f.write("===========\n") f.write("MODEL: " + str(fromIn) + "\tCORRESP.: " + os.path.abspath( outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])) + "\tCLUSTER: " + str( clus) + "\n") f.write("NATOMS: " + str(nAtoms) + "\t" + "NER: " + str(nER) + "\t" + "INITCC: " + str( initCC) + "\t" + "PSEUDO-FREE: " + str(finalCC) + "\t" + "wMPE: " + str(wMPEa) + " / " + str( wMPEb) + " -- " + str(wMPEc) + " / " + str(wMPEd) + "\n") f.write("SHIFT-ORIG: " + str(shx) + " " + str(shy) + " " + str(shz) + "\tMAX-CONTRAST: " + str( max(contrast)) + "\tMAX-CONNECT: " + str(max(connect)) + "\tmfom: " + str( mean_fom) + "\tsfom: " + str(std_fom) + " \n") f.write("===========\n") f.close() shutil.move(outputDic + str(fromIn) + ".pda", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])) dizio = {"model": fromIn, "corresp": outputDic + os.path.basename(convNames[str(fromIn) + ".pda"]), "natoms": nAtoms, "ner": nER, "initcc": initCC, "finalcc": finalCC, "cluster": clus, "wMPE_init": [wMPEa, wMPEb], "wMPE_end": [wMPEc, wMPEd], "shift_origin": [shx, shy, shz], "contrast": max(contrast), "connect": max(connect), "mfom": mean_fom, "sfom": std_fom} ClustPDB.append(dizio) if initcc_global: hp.push(-1 * dizio["initcc"], dizio) else: # initcc per residue hp.push(-1 * (dizio["initcc"] / dizio["ner"]), dizio) # PULIZIA FILES INUTILI # os.remove(outputDic+str(fromIn)+".pda") if not savePHS: os.remove(outputDic + str(fromIn) + ".lst") try: os.remove(outputDic + str(fromIn) + ".phs") except: pass else: shutil.move(outputDic + str(fromIn) + ".lst", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") shutil.move(outputDic + str(fromIn) + ".phs", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") if archivingAsBigFile: tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") try: os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") except: pass else: tarro = tarfile.open( outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".tar.gz", "w:gz") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") tarro.add(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs", arcname="./" + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") tarro.close() os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".lst") try: os.remove(outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".phs") except: pass os.remove(outputDic + str(fromIn) + ".hkl") try: if not usePDO: os.remove(outputDic + str(fromIn) + ".pdo") else: shutil.move(outputDic + str(fromIn) + ".pdo", outputDic + os.path.basename(convNames[str(fromIn) + ".pda"])[:-4] + ".pdo") except: pass try: os.remove(outputDic + str(fromIn) + ".ent") os.remove(outputDic + str(fromIn) + "_trace.ps") except: pass # if hasattr(cm,"channel"): # print cm.remove_remote_file(str(fromIn)+".pda") # print cm.remove_remote_file(str(fromIn)+".lst") # print cm.remove_remote_file(str(fromIn)+".pdb") # print cm.remove_remote_file(str(fromIn)+".phs") # print cm.remove_remote_file(str(fromIn)+".hkl") fromIn += 1 if fromIn % NUMBER_OF_FILES_PER_DIRECTORY == 0: if hasattr(cm, "channel"): cm.change_remote_dir(current_dir2) except: # print "Error...Trying to read again output files..." print sys.exc_info() traceback.print_exc(file=sys.stdout) riprova = True time.sleep(1) now = datetime.datetime.now() ti = now.strftime("%Y-%m-%d %H:%M") print "Not all models are yet performed..." + ti if not yetEvaluated: time.sleep(60) print "Now all models are performed." if archivingAsBigFile: if tarro != None: tarro.close() if hasattr(cm, "channel"): # print cm.remove_remote_file(nameJob+".cmd") # print cm.remove_remote_file(PATH_NEW_SHELXE) cm.change_remote_dir(current_dir) print cm.remove_remote_dir(os.path.basename(dirente)) SystemUtility.close_connection(DicGridConn, DicParameters, cm) f = open(outputDic + "../solCC.sum", "w") visited = [] for pla in range(hp.len()): item = hp.pop() prio, ele = item leden = os.path.basename(str(ele["corresp"])).split("_") if not isArcimboldo and len(leden) == 5 and "_".join(leden[:-1]) in visited: os.remove(ele["corresp"]) continue elif not isArcimboldo and len(leden) == 5: visited.append("_".join(leden[:-1])) npath = os.path.join(os.path.dirname(ele["corresp"]), "_".join(leden[:-1]) + ".pdb") shutil.move(ele["corresp"], npath) ele["corresp"] = npath f.write("===========\n") f.write("MODEL: " + str(pla + 1) + "\tCORRESP.: " + os.path.abspath(str(ele["corresp"])) + "\tCLUSTER: " + str( ele["cluster"]) + "\n") f.write("NATOMS: " + str(ele["natoms"]) + "\t" + "NER: " + str(ele["ner"]) + "\t" + "INITCC: " + str( ele["initcc"]) + "\t" + "PSEUDO-FREE: " + str(ele["finalcc"]) + "\t" + "wMPE: " + str( ele["wMPE_init"][0]) + " / " + str(ele["wMPE_init"][1]) + " -- " + str(ele["wMPE_end"][0]) + " / " + str( ele["wMPE_end"][1]) + "\n") f.write("SHIFT-ORIG: " + str(ele["shift_origin"][0]) + " " + str(ele["shift_origin"][1]) + " " + str( ele["shift_origin"][2]) + "\tMAX-CONTRAST: " + str(ele["contrast"]) + "\tMAX-CONNECT: " + str( ele["connect"]) + "\tmfom: " + str(ele["mfom"]) + "\tsfom: " + str(ele["sfom"]) + "\t" + str( ele["wMPE_init"][0]) + " \n") f.write("===========\n") f.close() os.remove(outputDic + "../tempSolCC.sum") CC_Val1, con = readCCValFromSUM(outputDic + "../solCC.sum") return CC_Val1 def selectCC(CC_Val3): """ :param CC_Val3: :type CC_Val3: :return: :rtype: """ hp = ADT.Heap() for item in c1: prio = -1 * (item["initcc"] / item["ner"]) hp.push(prio, item) done = [] lio = [] for tup in hp: prio, item = tup if os.path.basename(item["corresp"]) not in done: done.append(os.path.basename(item["corresp"])) dire = os.path.split(item["corresp"])[0] li = os.path.basename(item["corresp"]).split("_")[:-1] filename = os.path.join(dire, "_".join(li)[:-1] + ".pdb") shutil.move(item["corresp"], filename) item["corresp"] = filename lio.append(item) print "", os.path.basename(filename), item["ner"], item["initcc"], item["initcc"] / item["ner"], \ item["wMPE_init"][0] return lio def unifyCC2(c1, c2, c3, convNames, CluAll, suffixA="", suffixB="", suffixC="", llgn=5, solution_sorting_scheme="AUTO", mode="ARCIMBOLDO_LITE"): """ :param c1: :type c1: :param c2: :type c2: :param c3: :type c3: :param convNames: :type convNames: :param CluAll: :type CluAll: :param suffixA: :type suffixA: :param suffixB: :type suffixB: :param suffixC: :type suffixC: :param llgn: :type llgn: :param solution_sorting_scheme: :type solution_sorting_scheme: :param mode: :type mode: :return: :rtype: """ if solution_sorting_scheme not in ["AUTO", "LLG", "ZSCORE", "INITCC", "COMBINED"]: solution_sorting_scheme = "AUTO" isminuso = all(map(lambda x: int(x["ner"]) != 0, c1 + c2 + c3)) c1 = [dict(dizio, **{"suffix": suffixA}) for dizio in c1] c2 = [dict(dizio, **{"suffix": suffixB}) for dizio in c2] c3 = [dict(dizio, **{"suffix": suffixC}) for dizio in c3] if solution_sorting_scheme == "INITCC" or (isminuso and solution_sorting_scheme == "AUTO"): initccl = sorted(c1 + c2 + c3, key=lambda x: x["initcc"], reverse=True) done = [] initcclo = [] print "SORTING BY INITCC" for dil in range(len(initccl)): if os.path.basename(initccl[dil]["corresp"]) not in done: print "NAME:", initccl[dil]["corresp"], "INITCC:", initccl[dil]["initcc"], "NER:", initccl[dil]["ner"] done.append(os.path.basename(initccl[dil]["corresp"])) initcclo.append(initccl[dil]) return initcclo inter = [item[1] for sublist in map(lambda y: y["heapSolutions"].asList(), CluAll) for item in sublist] # all solutions among all clusters # previ = [item for sublist in [y["fixed_frags"] for y in inter if "fixed_frags" in y] for item in sublist] previ = [{"name": os.path.basename(dizio["corresp"])[:-4], "llg": dizio["llg"], "zscore": dizio["zscore"]} for dizio in c1 + c2 + c3 if "llg" in dizio] # print "ANTES" # print "DESPUES" # condition = "ensembleID" if mode == "ARCIMBOLDO_LITE": llgd = { dizio["name"] if len(dizio["name"].split("-")) > 1 else dizio["name"] + "-1": [dizio["llg"], dizio["zscore"]] for dizio in inter + previ} else: # traceback.print_exc(file=sys.stdout) llgd = {os.path.basename(convNames[dizio["name"]]): [dizio["llg"], dizio["zscore"]] for dizio in inter + previ} # for na in llgd: # print(na,llgd[na]) # print("===========================================") if solution_sorting_scheme == "LLG" or (not isminuso and solution_sorting_scheme == "AUTO"): ###for x in c1+c2+c3: ### print "SHERLOCK",os.path.basename(x["corresp"]) ### print "SHERLOCK",llgd[os.path.basename(x["corresp"])[:-4]] try: # ARCIMBOLDO_LITE case # for x in c1+c2+c3: # print os.path.basename(x["corresp"]) # print llgd[os.path.basename(x["corresp"])[:-4]] llgl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"])[:-4]][0], reverse=True) llgl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])[:-4]][0], "zscore": llgd[os.path.basename(dizio["corresp"])[:-4]][1]}) for dizio in llgl] except: # ARCIMBOLDO_BORGES case print sys.exc_info() traceback.print_exc(file=sys.stdout) for x in c1+c2+c3: print os.path.basename(x["corresp"]) for nu in llgd: print nu, llgd[nu], llgl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"])][0], reverse=True) llgl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1]}) for dizio in llgl] print "SORTING BY LLG" done = [] llglo = [] for dil in range(len(llgl)): if os.path.basename(llgl[dil]["corresp"]) not in done: print "NAME:", llgl[dil]["corresp"], "LLG:", llgl[dil][ "llg"] # d[os.path.basename(llgl[dil]["corresp"]).split("xx")[1][:-4]][0] done.append(os.path.basename(llgl[dil]["corresp"])) llglo.append(llgl[dil]) return llglo if solution_sorting_scheme == "ZSCORE": try: zscl = sorted(c1 + c2 + c3, key=lambda x: llgd[(os.path.basename(x["corresp"])[:-4])][1], reverse=True) zscl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])[:-4]][0], "zscore": llgd[os.path.basename(dizio["corresp"])[:-4]][1]}) for dizio in zscl] except: zscl = sorted(c1 + c2 + c3, key=lambda x: llgd[os.path.basename(x["corresp"])][1], reverse=True) zscl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1]}) for dizio in zscl] print "SORTING BY ZSCORE" done = [] zsclo = [] for dil in range(len(zscl)): if os.path.basename(zscl[dil]["corresp"]) not in done: print "NAME:", zscl[dil]["corresp"], "ZSCORE:", zscl[dil][ "zscore"] # llgd[os.path.basename(zscl[dil]["corresp"]).split("xx")[1][:-4]][1] done.append(os.path.basename(zscl[dil]["corresp"])) zsclo.append(zscl[dil]) return zsclo if solution_sorting_scheme == "COMBINED": maxinitcc = max(float(max(map(lambda x: x["initcc"], c1 + c2 + c3))), 1.0) maxllg = max(float(max(map(lambda x: x[0], llgd.values()))), 1.0) maxzscore = max(float(max(map(lambda x: x[1], llgd.values()))), 1.0) try: # combl = sorted(c1+c2+c3, key=lambda x: (llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][0]/maxllg)+(llgd[os.path.basename(x["corresp"]).split("xx")[1][:-4]][1]/maxzscore)+(x["initcc"]/maxinitcc), reverse=True) # combl = [dict(dizio, **{"llg":llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][0],"zscore":llgd[os.path.basename(dizio["corresp"]).split("xx")[1][:-4]][1]}) for dizio in combl] combl = [dict(dizio, **{"llg": llgd[(os.path.basename(dizio["corresp"])[:-4])][0], "zscore": llgd[(os.path.basename(dizio["corresp"])[:-4])][1], "combined": (llgd[(os.path.basename(dizio["corresp"])[:-4])][ 0] / maxllg) + ( llgd[(os.path.basename(dizio["corresp"])[:-4])][ 1] / maxzscore) + (dizio["initcc"] / maxinitcc)}) for dizio in c1 + c2 + c3] combl = sorted(combl, key=lambda x: x["combined"], reverse=True) except: combl = [dict(dizio, **{"llg": llgd[os.path.basename(dizio["corresp"])][0], "zscore": llgd[os.path.basename(dizio["corresp"])][1], "combined": (llgd[os.path.basename(dizio["corresp"])][0] / maxllg) + ( llgd[os.path.basename(dizio["corresp"])][1] / maxzscore) + ( dizio["initcc"] / maxinitcc)}) for dizio in c1 + c2 + c3] combl = sorted(combl, key=lambda x: x["combined"], reverse=True) print "SORTING BY COMBINED FOM of LLG, SCORE, INITCC" done = [] comblo = [] for dil in range(len(combl)): if os.path.basename(combl[dil]["corresp"]) not in done: print "NAME:", combl[dil]["corresp"], "LLG:", combl[dil]["llg"], "ZSCORE:", combl[dil][ "zscore"], "INITCC:", combl[dil]["initcc"], "NER", combl[dil]["ner"], "COMBINED:", combl[dil][ "combined"] done.append(os.path.basename(combl[dil]["corresp"])) comblo.append(combl[dil]) return comblo def unifyCC(c1, c2, c3, convNames, CluAll, suffixA="", suffixB="", suffixC="", llgn=5, solution_sorting_scheme="AUTO"): """ :param c1: :type c1: :param c2: :type c2: :param c3: :type c3: :param convNames: :type convNames: :param CluAll: :type CluAll: :param suffixA: :type suffixA: :param suffixB: :type suffixB: :param suffixC: :type suffixC: :param llgn: :type llgn: :param solution_sorting_scheme: :type solution_sorting_scheme: :return: :rtype: """ if solution_sorting_scheme not in ["AUTO", "LLG", "ZSCORE", "INITCC", "COMBINED"]: solution_sorting_scheme = "AUTO" hp = ADT.Heap() for item in c1: prio = -1 * item["initcc"] item["suffix"] = suffixA hp.push(prio, item) for item in c2: prio = -1 * item["initcc"] item["suffix"] = suffixB hp.push(prio, item) for item in c3: prio = -1 * item["initcc"] item["suffix"] = suffixC hp.push(prio, item) lio = [] done = [] hwn = 0 hl = ADT.Heap() for clu in CluAll: for prio, rotaz in sorted(clu["heapSolutions"].asList(), __cmp_rota2, reverse=True): if hwn >= llgn: hwn = 0 break else: hl.push(prio, rotaz) hwn += 1 hwn = 0 if len(hl.asList()) > 0: for sol in hl: if hwn >= llgn: break pin, itn = sol bestllg = os.path.basename(convNames[itn["name"]]) for pri, itm in hp.asList(): if os.path.basename(itm["corresp"]) == bestllg: lio.append(itm) done.append(bestllg) print "Adding top solution:", bestllg, "with LLG:", itn["llg"], "and ZSCORE:", itn["zscore"] hwn += 1 break for tup in hp: prio, item = tup if os.path.basename(item["corresp"]) not in done: lio.append(item) done.append(os.path.basename(item["corresp"])) return lio def shelxe_cycle_BORGES(lock, DicParameters, ClusAll, cm, sym, DicGridConn, i, current_directory, nameOutput, dirPathPart, fromNcycles, toNcycles, spaceGroup, hkl, ent, cell_dim, nice, shlxLineaB, shlxLineaLast, traceShelxe, fixed_frags, USE_PACKING, USE_TRANSLA, USE_REFINEMENT, NUMBER_REF_CYCLES, USE_RGR, isSHREDDER, tuple_phi=None): """ Performs the shelxe autotracing expansions for an ARCIMBOLDO_BORGES run :param lock: reentrant lock to avoid simoultaneous writing access to output file :type lock: threading.Condition(Rlock) :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param ClusAll: :type ClusAll: :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param i: current rotation cluster being evaluated :type i: integer :param current_directory: current working directory, where all the folders for the BORGES steps are found :type current_directory: str :param nameOutput: path without extension for the output html and xml files :type nameOutput: str :param dirPathPart: path to the folder with the prepared input files :type dirPathPart: str :param fromNcycles: starting autotracing cycle number :type fromNcycles: int :param toNcycles: last autotracing cycle number :type toNcycles: int :param spaceGroup: space group symbol :type spaceGroup: str :param hkl: path to the hkl reflection file :type hkl: str :param ent: path to the ent final model file for post-mortem analysis :type ent: str :param cell_dim: list with the unit cell parameters :type cell_dim: list :param shlxLineaB: shelxe argument line for all but the last cycle of autotracing :type shlxLineaB: str :param shlxLineaLast: shelxe argument line for the last cycle of autotracing :type shlxLineaLast: str :param traceShelxe: :type traceShelxe: :param fixed_frags: :type fixed_frags: :param USE_PACKING: boolean to indicate if packing must be performed :type USE_PACKING: bool :param USE_TRANSLA: boolean to indicate if the translation must be performed :type USE_TRANSLA: bool :param USE_REFINEMENT: :type USE_REFINEMENT: :param NUMBER_REF_CYCLES: :type NUMBER_REF_CYCLES: :param USE_RGR: :param isSHREDDER: :param tuple_phi: :return: """ fromphis = False is_alixe_exp = False insfile = None if isinstance(tuple_phi, tuple): print 'This expansion is going to be performed starting from phi files' fromphis = True is_alixe_exp = True # tuple_phi = (name.ins, {"name.phi":{""}) insfile = tuple_phi[0] lista_phis = tuple_phi[1].keys() if not os.path.exists(dirPathPart): os.makedirs(dirPathPart) for phi in lista_phis: try: os.symlink(phi, os.path.join(dirPathPart, os.path.basename(phi))) except: pass else: print 'This expansion is going to be performed starting from pda files' if not (os.path.exists(dirPathPart)): shutil.copytree(os.path.join(current_directory, "10_PREPARED/" + str(i) + "/"), dirPathPart) if fromphis: shlxLineaB = shlxLineaB + " -v0" for t in range(fromNcycles, toNcycles): if t==toNcycles-1: #print 'We are in the last cycle of expansions , change the shelxe line to ' #print shlxLineaLast shlxLineaB = shlxLineaLast SystemUtility.open_connection(DicGridConn, DicParameters, cm) if t == 2: shlx = shlxLineaB.split() for toc in range(len(shlx)): param = shlx[toc] if param.startswith("-a"): param = "-a1" if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = param if param.startswith("-K"): if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = "" shlxLineaB = " ".join(shlx) directoexp = "./11_EXP/" jobexp = "11_EXP_" fromdirexp = "11_EXP" if is_alixe_exp: directoexp = "./11_EXP_alixe/" jobexp = "11_EXP_alixe_" fromdirexp = "11_EXP_alixe" if not traceShelxe: pass else: todel = False if os.path.exists(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for r, subF, fi in os.walk(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) #print "pdbf", pdbf #print "fileu.split('_')",fileu.split('_') #print "len(fileu.split('_'))",len(fileu.split("_")) if len(fileu.split("_")) < 3: todel = True if todel: print 'Removing folder ',os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/") shutil.rmtree(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")) print 'Removing folder ',dirPathPart shutil.rmtree(dirPathPart) if t == 1: print 'First cycle of expansions was not completed' if fromphis: if not os.path.exists(dirPathPart): os.makedirs(dirPathPart) for phi in lista_phis: os.symlink(phi, os.path.join(dirPathPart, os.path.basename(phi))) else: shutil.copytree(os.path.join(current_directory, "10_PREPARED/" + str(i) + "/"), dirPathPart) else: print t, 'cycle of expansions was not completed, getting files from previous cycle and starting' os.makedirs(dirPathPart) for r, subF, fi in os.walk( os.path.join(current_directory, directoexp + str(i) + "/" + str(t - 1) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf, os.path.join(dirPathPart, os.path.basename(pdbf))) if not os.path.exists(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): if t > 1: insfile = None fromphis = False (nqueue24, convNames_4) = startExpansion(cm, sym, jobexp + str(i) + "_" + str(t), os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/"), hkl, ent, nice, cell_dim, spaceGroup, shlxLineaB, dirPathPart, single=True, insfile=insfile) shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) evaluateExp(DicParameters, cm, sym, DicGridConn, jobexp + str(i) + "_" + str(t), os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/"), nqueue24, convNames_4, dirPathPart, method="fast", insfile=insfile) else: convNames_4 = {} shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) for r, subF, fi in os.walk(os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf, os.path.join(dirPathPart, os.path.basename(pdbf))) mode = "ARCIMBOLDO-BORGES" if isSHREDDER: mode = "ARCIMBOLDO-SHREDDER" # NOTE CM: fromphis will be true for the first cycle, but then changes to normal if t > 1: insfile = None fromphis = False #print 'SHERLOCK t, ',t #print 'SHERLOCK fromphis ',fromphis writeOutputFile(lock=lock, DicParameters=DicParameters, ClusAll=ClusAll, outputDir=current_directory, filename=nameOutput, mode=mode, step="FAST_" + str(t) + "_" + str(i), ensembles=convNames_4, frag_fixed=fixed_frags, path1=os.path.join(current_directory, directoexp + str(i) + "/" + str(t) + "/*/"), useRefP1=USE_REFINEMENT, numberCyclesRef=NUMBER_REF_CYCLES, useRGR=USE_RGR, usePacking=USE_PACKING, useTransla=USE_TRANSLA, fromphis=fromphis, fromdirexp=fromdirexp) arci_output.generateHTML(lock, current_directory, nameOutput) SystemUtility.close_connection(DicGridConn, DicParameters, cm) shutil.rmtree(dirPathPart) SystemUtility.endCheckQueue() def shelxe_cycles(lock, DicParameters, cm, sym, DicGridConn, output_directory, nameOutput, dirPathPart, fragdirectory, fromNcycles, toNcycles, mtz, MW, NC, F, SIGF, res_refin, Intensities, Aniso, normfactors, tncsfactors, nice, RMSD, quate, laue, ncs, spaceGroup, hkl, ent, cell_dim, shlxLinea0, shlxLineaB, shlxLineaLast, USE_PACKING, USE_TRANSLA,USE_TNCS,solution_verification, solution_sorting_scheme, topExp, topNextFragment, write_output=True): """ Starts shelxe expansions for ARCIMBOLDO_LITE :param lock: reentrant lock to avoid simoultaneous writing access to output file :type lock: threading.Condition(Rlock) :param DicParameters: dictionary with key-value pairs required at different steps. Keys: "nameExecution", :type DicParameters: dict :param cm: Instance of the grid object :type cm: Grid.Grid object :param sym: Instance of the SystemUtility object :type sym: SystemUtility.SystemUtility object :param DicGridConn: dictionary with information required for the connections. Keys: "username","host","port", "passkey","promptA","isnfs","remote_submitter_username","remote_submitter_host", "remote_submitter_port","promptB" :type DicGridConn: dict :param output_directory: path to save output files :type output_directory: str :param nameOutput: path without extension for the output html and xml files :type nameOutput: str :param dirPathPart: path to the folder with the prepared input files :type dirPathPart: str :param fragdirectory: current directory, where the ens1_fragn folders are found :type fragdirectory: str :param fromNcycles: starting autotracing cycle number :type fromNcycles: int :param toNcycles: last autotracing cycle number :type toNcycles: int :param mtz: path to the mtz file with the original data :type mtz: str :param MW: molecular weight :type MW: float :param NC: number of component :type NC: integer :param F: label for the intentities (I) or amplitudes (F) :type F: str :param SIGF: label for the sigmas of the intentities (SIGI) or the amplitudes (SIGF) :type SIGF: str :param res_refin: resolution to be used in the refinement phaser jobs :type res_refin: float :param Intensities: boolean to be removed, currently not used :param Aniso: boolean to be removed, currently not used :param normfactors: path to the binary file with the anisotropy correction for the data :type normfactors: str :param tncsfactors: path to the binary file with the tNCS correction for the data :type tncsfactors: str :param nice: in multiprocessing, priority of the jobs. −20 is the highest priority and 19 is the lowest priority :type nice: int :param RMSD: rmsd value to be used in the phaser job :type RMSD: float :param quate: static reference to the Quaternions class :type quate: instance of Quaternions.Quaternions() :param laue: rotation matrix for the laue group of the space group set in the job :type laue: list :param ncs: non crystallographic symmetry matrix :type ncs: list :param spaceGroup: space group symbol :type spaceGroup: str :param hkl: path to the hkl reflection file :type hkl: str :param ent: path to the ent final model file for post-mortem analysis :type ent: str :param cell_dim: list with the unit cell parameters :type cell_dim: list :param shlxLinea0: shelxe argument line for a job without autotracing :type shlxLinea0: str :param shlxLineaB: shelxe argument line for all but the last cycle of autotracing :type shlxLineaB: str :param shlxLineaLast: shelxe argument line for the last cycle of autotracing :type shlxLineaLast: str :param USE_PACKING: boolean to indicate if packing must be performed :type USE_PACKING: bool :param USE_TRANSLA: boolean to indicate if the translation must be performed :type USE_TRANSLA: bool :param solution_verification: boolean to activate or deactivate the verification of the expansions :type solution_verification: bool :param solution_sorting_scheme: scheme to select solutions. Can be AUTO, LLG, ZSCORE, INITCC, COMBINED :type solution_sorting_scheme: str :param topExp: maximum number of solutions to sent for expansion :type topExp: int :param topNextFragment: :type topNextFragment: :return: """ SystemUtility.open_connection(DicGridConn, DicParameters, cm) # NOTE: The following allows to start one cycle with no autotracing and get the final correlation coefficient if toNcycles <= 1: toNcycles = 2 original_shlxLineaB = shlxLineaB for t in range(fromNcycles, toNcycles): if t==toNcycles-1: # We are in the last cycle of expansion, change to shlxLineaLast for last cycle shlxLineaB=shlxLineaLast if t == 2: shlx = shlxLineaB.split() for toc in range(len(shlx)): param = shlx[toc] if param.startswith("-a"): param = "-a1" if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = param if param.startswith("-K"): if toc + 1 < len(shlx) and not shlx[toc + 1].startswith("-"): shlx[toc + 1] = "" shlx[toc] = "" shlxLineaB = " ".join(shlx) # This block checks if this is a fully completed cycle or if we need to remove and recompute todel = False if os.path.exists(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for r, subF, fi in os.walk(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if not (fileu.startswith("ensemble") and len(pdbf.split("xx")) > 1 and len(pdbf.split("FR")) > 1): # print "---", pdbf # print pdbf.split("xx"),pdbf.split("FR") todel = True if todel: shutil.rmtree((os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"))) print 'This cycle',t,' was not finished, recomputing will be necessary! ' # This block will compute the expansions if it is required, otherwise it will read the files and prepare # input for next cycle of autotracing if not os.path.exists(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): print "\nComputing the expansions of cycle ",t,'\n' (nqueue24, convNames_4) = startExpansion(cm=cm, sym=sym, nameJob="8_EXP_LIBRARY" + "_" + str(t), outputDire=os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"),hkl=hkl, ent=ent, nice=nice, cell_dim=cell_dim, spaceGroup=spaceGroup, shlxLine=shlxLineaB, dirBase=dirPathPart, single=True) shutil.rmtree(dirPathPart) # remove preparation folder os.makedirs(dirPathPart) # generation of the preparation folder backPDB, backCC = evaluateExp(DicParameters=DicParameters, cm=cm, sym=sym, DicGridConn=DicGridConn, nameJob="8_EXP_LIBRARY" +"_" + str(t), outputDicr=os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/"), nqueue=nqueue24, convNames=convNames_4, dirPath=dirPathPart, method="fast", renamePDBs=True) else: print "\nReading expansions of cycle ", t, '\n' convNames_4 = {} shutil.rmtree(dirPathPart) os.makedirs(dirPathPart) for r, subF, fi in os.walk(os.path.join(fragdirectory, "./8_EXP_LIBRARY/" + str(t) + "/")): for fileu in fi: pdbf = os.path.join(r, fileu) if pdbf.endswith(".pdb"): shutil.copyfile(pdbf, os.path.join(dirPathPart, os.path.basename(pdbf))) if write_output: writeOutputFile(lock=lock, DicParameters=DicParameters, ClusAll=None, outputDir=output_directory, filename=nameOutput, mode="ARCIMBOLDO", step="FAST_" + str(t),ensembles=convNames_4, frag_fixed=1, path1=os.path.join(fragdirectory, "8_EXP_LIBRARY/" + str(t) + "/*/"), usePacking=USE_PACKING, useTransla=USE_TRANSLA) arci_output.generateHTML(lock, output_directory, nameOutput) shutil.rmtree(dirPathPart) SystemUtility.endCheckQueue() lock.acquire() nomexml = os.path.join(output_directory, nameOutput + ".xml") if not os.path.exists(nomexml): lock.release() return #1.Check best.pdb --> best.pda --> solCC.sum tree = ET.parse(nomexml) root = tree.getroot() name_model = root.find('backtracing/model').text name_model = name_model[:-4] best_cc = float(root.find('backtracing/finalcc').text) best_nres = int(root.find('backtracing/restraced').text) best_file = root.find('backtracing/file').text lock.release() if solution_verification and best_cc>=25.0: # Only if we have a high CC we need to verify print "Best model is", name_model, "located at", best_file, "with CC", best_cc, "and", best_nres, "traced" #2.Generate RNP .sh with all possible combinations of reversed helices for the selected solution if not os.path.exists(os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/clusters.sum")): fragment = int(name_model.split("FR")[1].split("_")[0]) + 1 pathPACK = os.path.join( os.path.join(os.path.join(output_directory, "ens1_frag" + str(fragment)), "4_PACK_LIBRARY"), "clusters.sum") pathPACKINV = os.path.join( os.path.join(os.path.join(output_directory, "ens1_frag" + str(fragment)), "4.5_INVERTED_LIBRARY"), "clusters.sum") if os.path.exists(pathPACKINV): pathPACK = pathPACKINV print "SUM to read", pathPACK # ense1, clu1, rt1, et1 = readClustersFromSUMToDB(DicParameters,sum,"ROTSOL") rop, topLLG, topZSCORE, posRank, ensem = __getStatFromSumAndModel(pathPACK, name_model, name_model, None, "ARCIMBOLDO", getEnsemble=True) Clud = [{"heapSolutions": ADT.Heap()}] Clud[0]["heapSolutions"].push((rop["llg"], rop["zscore"]), rop) writeSumClusters(Clud, os.path.join(output_directory, "9_VERIFICATION/"), "before_clusters", ensem) ensem, Clud = generateInvertedHelices(ensem, Clud, 1000, skipFirst=False, add_a_random_solution=True, spaceGroup=spaceGroup) writeSumClusters(Clud, os.path.join(output_directory, "9_VERIFICATION/"), "clusters", ensem) #3.Start Phaser with that .sh and score by LLG and ZSCORE eliminating all bad solutions. (nqueue5, convino) = startRNPOnePerCluster(DicParameters=DicParameters, cm=cm, sym=sym, nameJob="PERTURBATIONS", ClusAll=Clud, ensembles=ensem, outputDire=os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/"), mtz=mtz, MW=MW, NC=NC, F=F, SIGF=SIGF, Intensities=Intensities, Aniso=Aniso, normfactors=normfactors, tncsfactors=tncsfactors, nice=nice, RMSD=0.5, lowR=99, highR=res_refin, spaceGroup=spaceGroup, frag_fixed=fragment, tops=None, usePDO=False, sampl=-1, USE_TNCS=USE_TNCS, USE_RGR=False, BFAC=False, VRMS=True, BULK_FSOL=-1, BULK_BSOL=-1, RNP_GYRE=False, consider_inverted_helix=True) SystemUtility.endCheckQueue() CluAll, ensembles, tolose = evaluateFTF(DicParameters=DicParameters, cm=cm, sym=sym, DicGridConn=DicGridConn,nameJob="PERTURBATIONS", outputDicr=os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/"), nqueue=nqueue5, ensembles=ensem, excludeZscore=0, fixed_frags=fragment, quate=quate, mode="RNP", laue=laue, listNCS=ncs, clusteringMode="rot_matrices", cell_dim=cell_dim, thresholdCompare=15.0, evaLLONG=False, applyNameFilter=False, convNames=convino, tops=1000, isArcimboldo=True, renamePDBs=True, is_verification=True) writeSumClusters(CluAll, os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS"), "clusters", ensembles) else: ensembles, CluAll, rt, et = readClustersFromSUMToDB(DicParameters, os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/clusters.sum"), "ROTSOL") ent = best_file if "-x" not in shlxLinea0: shlxLinea0 += " -x" if "-x" not in original_shlxLineaB: original_shlxLineaB += " -x" if "-x" not in shlxLineaLast: shlxLineaLast += " -x" if not os.path.exists(os.path.join(output_directory, "9_VERIFICATION/EVAL_PERTURBATIONS/solCC.sum")): SystemUtility.open_connection(DicGridConn, DicParameters, cm) (nqueue6, convNames6) = startExpansion(cm, sym, "EVAL_PERTURBATIONS", os.path.join(output_directory, "9_VERIFICATION/EVAL_PERTURBATIONS/"), hkl, ent, nice, cell_dim, spaceGroup, shlxLinea0, os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/")) SystemUtility.endCheckQueue() CC_Val1 = evaluateExp_CC(DicParameters, cm, sym, DicGridConn, "EVAL_PERTURBATIONS", os.path.join(output_directory, "9_VERIFICATION/EVAL_PERTURBATIONS/"), nqueue6, convNames6, isArcimboldo=True, usePDO=False, savePHS=False, archivingAsBigFile=True, phs_fom_statistics=False) else: CC_Val1, con = readCCValFromSUM(os.path.join(output_directory, "9_VERIFICATION/EVAL_PERTURBATIONS/solCC.sum")) #4.Select top n (number of cores) solutions and reperform with that solutions the same shelxe line and autotracing cycles enst, clur, arp, frap = readClustersFromSUMToDB(DicParameters, os.path.join(output_directory, "9_VERIFICATION/PERTURBATIONS/clusters.sum"), "ROTSOL") CC_Vals = [] CC_Vals = unifyCC2(CC_Vals, CC_Val1, [], enst, clur, suffixA="", suffixB="", suffixC="", llgn=2, solution_sorting_scheme=solution_sorting_scheme) convNames14 = startPREPARE(cm, sym, "9_7_PREPARED_ALL", CC_Vals, os.path.join(output_directory, "9_VERIFICATION/EXP_PREPARE/"), cell_dim, spaceGroup, topExp, topNext=topNextFragment,always_include_names_with_key="WRO") shelxe_cycles(lock=lock, DicParameters=DicParameters, cm=cm, sym=sym, DicGridConn=DicGridConn, output_directory=os.path.join(output_directory, "9_VERIFICATION/"), nameOutput=nameOutput, dirPathPart=os.path.join(output_directory, "9_VERIFICATION/EXP_PREPARE"), fragdirectory=os.path.join(output_directory, "9_VERIFICATION/"),fromNcycles=fromNcycles, toNcycles=toNcycles, mtz=mtz, MW=MW, NC=NC, F=F, SIGF=SIGF, res_refin=res_refin, Intensities=Intensities, Aniso=Aniso, normfactors=normfactors, tncsfactors=tncsfactors,nice=nice, RMSD=RMSD, quate=quate, laue=laue, ncs=ncs, spaceGroup=spaceGroup, hkl=hkl, ent=ent, cell_dim=cell_dim, shlxLinea0=shlxLinea0, shlxLineaB=original_shlxLineaB, shlxLineaLast=shlxLineaLast, USE_PACKING=USE_PACKING, USE_TRANSLA=USE_TRANSLA, USE_TNCS=USE_TNCS, solution_verification=False, solution_sorting_scheme=solution_sorting_scheme, topExp=topExp, topNextFragment=topNextFragment, write_output=False) # lock.acquire() # nomexml = os.path.join(output_directory, nameOutput + ".xml") # # if not os.path.exists(nomexml): # lock.release() # return # # tree = ET.parse(nomexml) # root = tree.getroot() # name_model_after = root.find('backtracing/model').text # name_model_after = name_model[:-4] # best_cc_after = root.find('backtracing/finalcc').text # best_nres_after = root.find('backtracing/restraced').text # best_file_after = root.find('backtracing/file').text # lock.release() results_verification = read_all_data_from_expansion_directory(os.path.join(output_directory, "9_VERIFICATION/8_EXP_LIBRARY")) #NOTE: Based on results_verification dictionary do what Iracema commands and then write the messagge and display a graph into the html message = "" solv = True wMPDr = numpy.array([results_verification[w]["wMPE"] for w in results_verification if "WRO" not in w]) CCr = numpy.array([results_verification[w]["cc"] for w in results_verification if "WRO" not in w]) wMPDw = [results_verification[w]["wMPE"] for w in results_verification if "WRO" in w][0] CCw = [results_verification[w]["cc"] for w in results_verification if "WRO" in w][0] CV = (wMPDr.std()/wMPDr.mean())*100.0 twoclusters = False if CV >= 10: rx = max([abs(CCr[p]-CCr[p+1]) for p,u in enumerate(sorted(CCr)) if p < len(CCr)-1]) ry = max([abs(wMPDr[p]-wMPDr[p+1]) for p,u in enumerate(sorted(wMPDr)) if p < len(wMPDr)-1]) if rx > 10 and ry > 10: twoclusters = True else: solv = False message = "WARNING: Verification reaches no conclusive distinction between the best solution and a random set." if not twoclusters and wMPDr.mean() >= 80.0: solv = False message = "Verification shows the structure is not solved despite the high CC.
This is not uncommon of Coiled Coils at this resolution" elif not twoclusters and abs(CCr.mean() - best_cc) > 9: solv = False message = "Verification has determined the structure is not solved despite the high CC.
Best starting solution is producing completely different traces." elif wMPDw < 80: solv = False message = "WARNING: Verification reaches no conclusive distinction between the best solution and a random set.
Generation of a random solution failed." elif not twoclusters and CCr.mean() - CCw < 9: solv = False message = "Verification has determined that the structure is not solved despite the high CC.
Equivalent CCs are obtained from random solution." elif twoclusters and best_cc - CCw < 9: solv = False message = "Verification has determined that the structure is not solved despite the high CC.
Equivalent CCs are obtained from random solution." else: solv = True message = "Verification has determined that the structure is solved.
The best solution is clearly distinguised from a random one." writeOutputFile(lock=lock, DicParameters=DicParameters, ClusAll=None, outputDir=output_directory, filename=nameOutput, mode="ARCIMBOLDO", step="VERIFICATION", ensembles=None, frag_fixed=None, dizio_verification=results_verification, message=message, solved=solv) arci_output.generateHTML(lock, output_directory, nameOutput) #print "Best original model was", name_model, "located at", best_file, "with CC", best_cc, "and", best_nres, "traced" #print "After verification the Best resulted model was",name_model_after, "located at", best_file_after, "with CC", best_cc_after, "and", best_nres_after, "traced" # # if best_cc >= best_cc_after: # print "It seems that the original model was correct and indeed it is a solution: ",best_file # else: # print "It looks like the original model has no better CC compared to its perturbations" # print "It is likely that solution is wrong or indistinguishable." def readSpaceGroupFromOut(out): """ :param out: complete out from a phaser job :type out: str :return: the spaceGroup symbol string :rtype: str """ sg_match = re.search(r'Space-Group Name \(Hall Symbol\):.*\(', out) spaceGroup = ((sg_match.group()).split(':')[1]).split('(')[0].strip() return spaceGroup def detect_tNCS_from_phaser_out(out): """ :param out: complete out from a phaser job :type out: str :return: tNCS_bool indicating if tNCS has been found :rtype: bool """ tncs_match = re.search(r'No tNCS found in Patterson',out) if tncs_match is not None: tNCS_bool = False else: tNCS_bool = True return tNCS_bool def uniqueReflectionsFromOut(out): """ :param out: :type out: :return: :rtype: """ # READING THE NUMBER OF UNIQUE REFLECTION FROM OUT oi = out.splitlines() ref = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Number of Reflections in Selected Resolution Range:"): ref = uu.split()[7] break return int(ref) def resolutionFromOut(out): """ :param out: :type out: :return: :rtype: """ # READING THE RESOLUTION FROM OUT oi = out.splitlines() res = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Resolution Selected:"): res = uu.split()[2] break return float(res) def cellDimensionFromOut(out): """ :param out: :type out: :return: :rtype: """ # READING THE CELL DIMENSIONS FROM MTZ oi = out.splitlines() cell_dim = None for lu in range(len(oi)): uu = (oi[lu]).strip() if uu.startswith("Unit Cell:"): cell_dim = uu.split()[2:] break return cell_dim def mergePDBfiles(listPDB, outputDir="", nameOutput="mergedPDB.pdb", cell_dim=[0, 0, 0, 0, 0, 0], spaceGroup="P1", pda=True): # NSADD merge PDB files """ Merge pdb files (provided as a in a single file). It should also add a CRYSTCARD. :param listPDB: list of paths :type listPDB: :param outputDir: :type outputDir: :param nameOutput: :type nameOutput: :param cell_dim: :type cell_dim: :param spaceGroup: :type spaceGroup: :param pda: if True, extension of the output file is .pda :type pda: bool :return: :rtype: """ chainID = ( "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "0", "1", "2", "3", "4", "5", "6", "7", "8", "9") expr = re.compile("(ATOM |HETATM).{5}(.{10}).(.*)") ext = ".pda" if pda else ".pdb" # extension of the output file nameOutput = os.path.join(outputDir, nameOutput + ext) out = open(nameOutput, 'w') # making the CRYST1 record from unit cell parameters and spaceGroup, put the number of fixed fragments as Z value Zvalue = len(listPDB) cryst1 = "CRYST1" cryst1 += '{:>9}{:>9}{:>9}{:>7}{:>7}{:>7} {:<10}{:>5}'.format(cell_dim[0], cell_dim[1], cell_dim[2], cell_dim[3], cell_dim[4], cell_dim[5], spaceGroup, Zvalue) # Unit cell parameters + spacegroup cryst1 += "\n" out.write(cryst1) i = 0 atomnum = 1 # to renumber atoms from 1 in the final merged pdb for pdb in listPDB: towrite = "" for line in open(pdb): result = expr.search(line) if result: debut = result.group(1) milieu = result.group(2) fin = result.group(3) towrite += debut numero = str(atomnum) towrite += " " * (5 - len(numero)) # enough spaces so that the final atom number is 5 characters long towrite += numero # new atom number towrite += milieu towrite += chainID[i] + fin + "\n" atomnum += 1 # increment atom number before next line out.write(towrite) if (i != len(listPDB) - 1): out.write("TER\n") else: out.write("END") i += 1 # next pdb in the list out.close() return nameOutput def rototranslateAndMergePDBs(listEulerFrac, outputDir, nameOutput, cell_dim, spaceGroup, i=0): # NS ADD """ Rototranslate all pdbs from pdblist according to their euler angles and frac translation contained in dicEulerFrac :param listEulerFrac: :type listEulerFrac: :param outputDir: :type outputDir: :param nameOutput: :type nameOutput: :param cell_dim: :type cell_dim: :param spaceGroup: :type spaceGroup: :param i: :type i: :return: :rtype: """ # It merges all the rototranslated pdbs in a single pdb file in the end pdblistOut = [] # To store the rotated pdb files paths before merging them p = PDBParser(PERMISSIVE=1) j = 0 for pdbTuple in listEulerFrac: # listEulerFrac contains tuples ("pdbfile",[FracEuler]) pdbfile = pdbTuple[0] # pdb file name to be opened structure = p.get_structure('s' + str(i), pdbfile) euler = pdbTuple[1][0:3] # euler angles for that pdb stored in an array, itself stored in a dictionary frac = pdbTuple[1][3:6] rotmat = matrixFromEulerAngles(euler[0], euler[1], euler[2]) # First, rototranslate all pdb files with the provided euler and frac: structureRotated = rotateStructureByMatrix(1, 1, structure, [rotmat], outputDir, cell_dim, writePDB=False) # returns a structure object pdbfileRototranslated = translateStructurebyFrac(i, j, structureRotated, frac, outputDir, cell_dim) # This is a path to output_rottra.pdb pdblistOut.append(pdbfileRototranslated) # Populate pdblistOut with the rotated pdb files i += 1 mergedPDBfromPreviousSol = mergePDBfiles(pdblistOut, outputDir, nameOutput, cell_dim, spaceGroup) # Merge all the rotated pdb in a single file, nameOutput will be rota["name"] # Removing intermediate rottra files for k in range(j): os.remove(os.path.join(outputDir, str(i) + "_" + str(k) + "_rottra.pdb")) return mergedPDBfromPreviousSol # string with the path of the merged pdb def generatePhasesFromModel(mergedpdb,mtzexp,HKLfile,FP,SIGFP,SG,outputDir,cell_dim,resolution="1.0",shelxebeta="shelxebetafast2",intensities=True,nDMcycles="9",solventContent="0.5",protocol=0): #NS ADD """Performs SHELXE DM on a model, then converts the output PHS into MTZ to calculate a map from the phs F, SIGF and PHI. This map is used in turn to produce Fcalcs that will be scaled to the original Fobs, everything is merged into a single MTZ""" # #########SPACE GROUP CORRESPONDENCE XDS->SHELXC obtained with parseSG.pl sgDic = {'P 61 2 2': '178', 'I 2 2 2': '23', 'P 42 2 2': '93', 'P 3 2 1': '150', 'P 63 2 2': '182', 'P 43 2 2': '95', 'A 1 2 1': 'A2', 'I 2 3': '197', 'I 21 3': '199', 'P 6 2 2': '177', 'F 2 3': '196', 'F 2 2 2': '22', 'I 4 3 2': '211', 'P 62 2 2': '180', 'I 21 21 21': '24', 'P 41 21 2': '92', 'C 1 2 1': '5', 'C 2 2 21': '20', 'P 31 2 1': '152', 'H 3 2': '155', 'I 41 2 2': '98', 'P 31 1 2': '151', 'P 21 21 2': '18', 'R 3 2': 'R 3 2', 'P 65 2 2': '179', 'P 4 21 2': '90', 'R 3': 'R 3', 'P 4 2 2': '89', 'P 41 2 2': '91', 'P 42 21 2': '94', 'P 42 3 2': '208', 'I 41': '80', 'P 42': '77', 'P 41 3 2': '213', 'P 21 3': '198', 'P 64 2 2': '181', 'I 41 3 2': '214', 'P 3': '143', 'P 1': '1', 'P 6': '168', 'I 4': '79', 'P 4': '75', 'P 32 1 2': '153', 'I 4 2 2': '97', 'C 2 2 2': '21', 'P 2 2 21': '17', 'P 32 2 1': '154', 'P 2 2 2': '16', 'P 21 21 21': '19', 'H 3': '146', 'P 3 1 2': '149', 'P 4 3 2': '207', 'P 43 3 2': '212', 'P 63': '173', 'P 64': '172', 'P 65': '170', 'P 62': '171', 'P 1 2 1': '3', 'F 41 3 2': '210', 'P 61': '169', 'P 31': '144', 'P 41': '76', 'I 1 2 1': 'I2', 'P 32': '145', 'P 1 21 1': '4', 'P 2 3': '195', 'P 43': '78', 'F 4 3 2': '209', 'P 43 21 2': '96'} sgDicShort = {'F222': '22', 'P6222': '180', 'P6322': '182', 'P4212': '90', 'H3': '146', 'P432': '207', 'F23': '196', 'P213': '198', 'P622': '177', 'P6': '168', 'P4332': '212', 'P312': '149', 'P43212': '96', 'P6522': '179', 'P21212': '18', 'I213': '199', 'I2': 'I2', 'P4232': '208', 'I4': '79', 'H32': '155', 'I212121': '24', 'A2': 'A2', 'P6422': '181', 'P32': '145', 'C2': '5', 'P4132': '213', 'P212121': '19', 'P3221': '154', 'R32': 'R 3 2', 'C222': '21', 'P2': '3', 'P3': '143', 'P1': '1', 'P321': '150', 'C2221': '20', 'P4': '75', 'R3': 'R 3', 'P422': '89', 'I432': '211', 'I4132': '214', 'P41212': '92', 'P42212': '94', 'P4122': '91', 'F4132': '210', 'P4322': '95', 'P41': '76', 'I4122': '98', 'P61': '169', 'P63': '173', 'P62': '171', 'P65': '170', 'P64': '172', 'P6122': '178', 'P3112': '151', 'P222': '16', 'I222': '23', 'P4222': '93', 'P31': '144', 'I422': '97', 'P21': '4', 'F432': '209', 'P23': '195', 'P3212': '153', 'I23': '197', 'P3121': '152', 'P43': '78', 'P42': '77', 'P2221': '17', 'I41': '80'} SGnumber = "1" # Get the space group number for input into f2mtz if SG in sgDic: SGnumber = sgDic[SG] elif SG in sgDicShort: SGnumber = sgDicShort[SG] # Get the pdb file name without extension to set the output mtz file xyzin = os.path.basename(mergedpdb) # just the local name myfile.pdb (no absolute path) filename, _ = os.path.splitext(mergedpdb) # myfile #protocol (here only influences the labels to choose for anisotropy correction) #default value (protocol=0) if protocol==1: Fphaser="FCalc" SIGFphaser="SIGFobs_scaled2FC" elif protocol==2: Fphaser="F_shxe" SIGFphaser="SIGF_shxe" else: Fphaser="F_trunc" SIGFphaser="SIGF_trunc" #HKL file, must be present for SHELX to run with the same name as the pda file (create a symbolic link in the same directory as the pda file) namehkl = os.path.join(outputDir,filename+".hkl") if not os.path.exists(namehkl): os.symlink(HKLfile, namehkl) # Local names for input into F2MTZ PHSfile = filename + ".phs" # PHS file generated by SHELXE # MTZ file mtztmp = filename + "tmp.mtz" mtzout = filename + ".mtz" #Local names for input into F2MTZ PHSfile = filename+".phs" #PHS file generated by SHELXE #MTZ file mtztmp=filename+"tmp.mtz" #from PHS file maptmp=filename+"tmp.map" mtztmp2=filename+"tmp2.mtz" #From the map mtzout=filename+".mtz" #will be used for Fcalc SIGFobsMR normfactorsF=filename+'_anisF.norm' tncsfactorsF=filename+'_anisF.tncs' #Changing directory for performing SHELXE, F2MTZ and CAD jobs #directory from which the script is launched scriptDirectory=os.path.dirname(os.path.realpath(__file__)) os.chdir(outputDir) #changing to the local directory to run SHELXE (it doesn't like too absolute paths as arguments if they are too long) #SHELXE DM run on the fixed fragment model shxopt="" if intensities else "-f" #First perform the shelxebetafast2 run (DM) setting the Sphere of Influence radius to SphereRadius SphereRadius=resolution if float(resolution)>2.5 else '2.42' shelxeCMD=shelxebeta+" -m%s -s%s -A%s %s %s"%(nDMcycles,solventContent,SphereRadius,shxopt,xyzin) p1 = subprocess.Popen([shelxeCMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE ,shell=True) print "Performing %s cycle(s) of density modification with %s on the fixed fragments to generate phases (Sphere of Influence radius : %s Ang.)\n"% (nDMcycles,shelxebeta,SphereRadius) p1.stdout.read() #print p1.stderr.read() #CONVERSION PHS TO MTZ (called mtztmp here) f2mtzCMD="""f2mtz hklin %s hklout %s << eof CELL %s %s %s %s %s %s SYMM %s TITLE Conversion from phs to mtz LABOUT H K L F_shxe FOM PHI_shxe SIGF_shxe CTYPOUT H H H F W P Q PNAME Arcimboldo DNAME phs to mtz conversion END eof """ % (PHSfile, mtztmp, cell_dim[0], cell_dim[1], cell_dim[2], cell_dim[3], cell_dim[4], cell_dim[5], SGnumber) p2 = subprocess.Popen([f2mtzCMD], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) print "Now converting the PHS file into MTZ file\n" p2.stdout.read() #print p2.stderr.read() #NOW generate a map using FFT, with Fshxe, SIGFshxe, PHIshxe, FOM. fftCMD="""fft hklin %s MAPOUT %s <6}"), ("serial", 2, "{:>5}"), ("dummy", 3, ""), ("atomName", 4, "{:>4}"), ("altLoc", 5, "{:>1}"), ("resName", 6, "{:>3}"), ("dummy", 7, ""), ("chainID", 8, "{:>1}"), ("seqnum", 9, "{:4d}"), ("insertionCode", 10, "{:>1}"), ("dummy", 11, ""), ("x", 12, "{:8.3f}"), ("y", 13, "{:8.3f}"), ("z", 14, "{:8.3f}"), ("occupancy", 15, "{:6.2f}"), ("bfact", 16, "{:6.2f}"), ("dummy", 17, ""), ("element", 18, "{:>2}")) # generating the substitution line subLine = "" for arg in possibleArguments: if arg[0] in replaceDic.keys(): subLine += arg[2].format(replaceDic[arg[0]]) else: subLine += "\\g<" + str(arg[1]) + ">" # print "SUBLINE IS:"+subLine # Nowperforming the substitution out = regexprATOM.sub(subLine, inputLine) return out def transformInputPDB(pdbfile, cell_dim=[0, 0, 0, 0, 0, 0], spaceGroup="P1", Zvalue="1", transform="none", Bfact=0): """Modify all the input pdb files present in ensembles, by default only changes the CRYSTCARD according to the input data""" if transform.lower() not in ("polya", "pseudos", "polys", "arom"): print "TRANSFORMING THE INPUT PDB IN " + transform # OPTIONS : # transform (string) , can be either: # polyA: changes all input pdb to poly-alanine # pseudoSer(float): transforms in a poly-alanine in which the C-beta is an oxygen # polySer # arom: leaves aromatic residues only # bfact (float) : if not 0, set all Bfactors to the provided number (not done yet) # making the CRYST1 record from unit cell parameters and spaceGroup, put the number of fixed fragments as Z value cryst1 = "CRYST1" cryst1 += '{:>9}{:>9}{:>9}{:>7}{:>7}{:>7} {:<10}{:>5}'.format(cell_dim[0], cell_dim[1], cell_dim[2], cell_dim[3], cell_dim[4], cell_dim[5], spaceGroup, Zvalue) # Unit cell parameters + spacegroup cryst1 += "\n" expression = "^(ATOM |HETATM|ANISO )" # keep only atoms in the final pdb file or ANISO records if transform.lower() in ("polya", "pseudos"): expression = "^(ATOM |HETATM|ANISO )[\d\s]{5} .(N |CA|C |O |CB)" elif transform.lower() == "polys": expression = "^(ATOM |HETATM|ANISO )[\d\s]{5} .(N |CA |C |O |CB |CD |CD1)" # captures C delta as well elif transform.lower() == "arom": expression = "^(ATOM |HETATM|ANISO )[\d\s]{5} .(.{3}) (\w{3})" # capture the residue type as well else: expression = "^(ATOM |HETATM|ANISO )" # only select ATOM and ANISO records if no transformation requested # Regexp to exclude these lines from the output pdb and write our CRYSTCARD instead selectiveREGEXP = re.compile(expression) terendREGEXP = re.compile("(^TER|END)") # replacing the line in the input PDB towrite = cryst1 # Starts by writing the CRYSTCARD for line in open(pdbfile): m = selectiveREGEXP.match(line) m2 = terendREGEXP.match(line) if m: if transform.lower() == "polya": line = replaceATOMrec(line, {"resName": "ALA"}) elif transform.lower() == "pseudos" and m.group(2).strip(' ') == "CB": line = replaceATOMrec(line, {"atomName": "OB ", "element": "O"}) elif transform.lower() == "polys" and m.group(2).strip(' ') == "CD": line = replaceATOMrec(line, {"atomName": "OD ", "resName": "SER", "element": "O"}) elif transform.lower() == "arom": if not (m.group(3) in ("PHE", "TYR", "HIS", "TRP", "LEU", "ILE", "VAL")): if (m.group(2).strip(' ') in ('N', 'CA', 'C', 'O', 'CB')): pass else: line = "" towrite += line elif m2: towrite += line elif line.startswith("MODEL") or line.startswith("ENDM"): towrite += line out = open(pdbfile, 'w') out.write(towrite) out.close()