#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Python imports
from __future__ import division

import ConfigParser
import cPickle
import copy
import datetime
import math
import operator
import os
import sys
import time

import numpy

import alixe_library as al

# Get a copy of the space group dictionary
dictio_space_groups = al.get_spacegroup_dictionary()  # NOTE: Maybe directly access from the module to avoid having it in memory?


def fishing_round_by_prio_list(dict_first_round_by_rotclu, reference_hkl, sg_symbol, f_fom, phstat_version, path_phstat,
                               ncores, clust_fold, path_ins, cell, resolution, cycles, tolerance):
    list_prio = []
    list_to_remove = []
    sg_number = al.get_space_group_number_from_symbol(sg_symbol)
    seed = 0  # We give the input sorted so in the fortran phstat we want it to use the first one as ref
    for rotclu in dict_first_round_by_rotclu.keys():
        topllg = 0
        name_clust_topllg = ''
        topzscore = 0
        name_clust_topzscore = ''
        for cluster in dict_first_round_by_rotclu[rotclu].keys():
            if dict_first_round_by_rotclu[rotclu][cluster]['dict_FOMs']['llg'] > topllg:
                topllg = dict_first_round_by_rotclu[rotclu][cluster]['dict_FOMs']['llg']
                name_clust_topllg = cluster
            if dict_first_round_by_rotclu[rotclu][cluster]['dict_FOMs']['zscore'] > topzscore:
                topzscore = dict_first_round_by_rotclu[rotclu][cluster]['dict_FOMs']['zscore']
                name_clust_topzscore = cluster
        if name_clust_topllg not in list_prio:
            list_prio.append(name_clust_topllg)
        if name_clust_topzscore not in list_prio:
            list_prio.append(name_clust_topzscore)
    list_rest = []
    for rotclu in dict_first_round_by_rotclu.keys():
        for cluster in dict_first_round_by_rotclu[rotclu]:
            if cluster not in list_prio:
                list_rest.append((cluster, dict_first_round_by_rotclu[rotclu][cluster]['dict_FOMs']['llg']))
    sorted_list_rest = sorted(list_rest, key=lambda x: x[1])
    for i in range(len(sorted_list_rest)):
        sorted_list_rest[i] = sorted_list_rest[i][0]
    list_prio.extend(sorted_list_rest)
    number_of_trials = int(ncores)
    max_number_of_trials = len(list_prio)
    dict_result_by_trial = {}
    raw_clust_second_round = open(os.path.join(clust_fold, "info_clust_second_round_raw"), 'w')
    del raw_clust_second_round
    table_clust_second_round = open(os.path.join(clust_fold, "info_clust_second_round_table"), 'w')
    table_clust_second_round.write('%-40s %-5s %-10s %-10s\n' % ('Cluster', 'n_phs', 'wmpd_max', 'wmpd_min'))
    del table_clust_second_round
    for i in xrange(number_of_trials):
        # if i==1:
        #    print "list_to_remove",list_to_remove
        if i >= max_number_of_trials:
            break  # We can't use more references, we finished the list!
        reference = list_prio[i]
        if (os.path.split(reference))[1] in list_to_remove:
            print "This reference, ", reference, " was already fished with another reference. Skipping this cycle"
            continue  # We go to the next iteration, because this reference has been fished already
        # print "CHECK: len(list_prio) ",len(list_prio)
        # print "CHECK: len(list_to_remove)",len(list_to_remove)
        name_ref = os.path.split(reference)[1]
        name_phi = reference[:-4] + "_ref.phi"
        path_phi = os.path.join(clust_fold, reference[:-4] + "_ref.phi")
        if phstat_version == 'python':
            dict_phs = {}
            dict_phs[reference] = True
            for j in xrange(len(list_prio)):
                name_file = (os.path.split(list_prio[j]))[1]
                if (list_prio[j] not in dict_phs.keys()) and (name_file not in list_to_remove):
                    dict_phs[list_prio[j]] = False
            dict_result_by_trial[list_prio[i]] = startALIXEasPHSTAT(clust_fold, reference_hkl, dict_phs, cell,
                                                                    sg_number, tolerance=tolerance,
                                                                    resolution=resolution, cycles=cycles, f_fom=f_fom)
        elif phstat_version == 'fortran':
            # NOTE: I need to use a relative path because fortran does not accept such long paths
            path = os.path.normpath(clust_fold)
            list_path = path.split(os.sep)
            relative_path_clust_fold = './' + list_path[-1]
            # 1) Write an ls file with the list of phs, putting first the reference
            path_ls = os.path.join(clust_fold, reference[:-4] + "_ref.ls")
            relative_ref = os.path.join(relative_path_clust_fold, name_ref)
            lsrotfile = open(path_ls, 'w')
            lsrotfile.write(relative_ref + '\n')
            for j in xrange(len(list_prio)):
                phs_namefile = (os.path.split(list_prio[j]))[1]
                phs_relative_path = os.path.join(relative_path_clust_fold, phs_namefile)
                # if phs_namefile in list_to_remove:
                #    print 'phs_namefile ',phs_namefile,' is in list_to_remove'
                if phs_relative_path != relative_ref and (phs_namefile not in list_to_remove):
                    lsrotfile.write(phs_relative_path + '\n')
            lsrotfile.close()
            # 2) Link the ins file
            os.link(path_ins, os.path.join(clust_fold, path_ls[:-3] + ".ins"))
            # 3) Launch the function in alixe_library
            complete_output = al.call_phstat_print_for_clustering(path_ls[:-3], relative_path_clust_fold, path_phstat,
                                                                  resolution, seed, tolerance, cycles)
            print complete_output
            ls_file = open(path_ls, 'r')
            ls_content = ls_file.read()
            del ls_file
            dict_result_by_trial[reference] = {name_phi: None}
            dict_result_by_trial[reference][name_phi] = al.read_phstat_print_clusterization_output(ls_content,
                                                                                                   complete_output,
                                                                                                   cycles)
        # NOTE: Because of the relative paths, in the fortran casse I need to modify the dictionary now to contain the full paths
        raw_clust_second_round = open(os.path.join(clust_fold, "info_clust_second_round_raw"), 'a')
        table_clust_second_round = open(os.path.join(clust_fold, "info_clust_second_round_table"), 'a')
        raw_clust_second_round.write(
            "*********************************************************************************************************************************************\n")
        n_phs = len(dict_result_by_trial[list_prio[i]][name_phi].keys())
        raw_clust_second_round.write("CLUSTER fishing with reference " + name_ref + ' , found in file ' + name_ref[
                                                                                                          :-4] + '_ref.phi, containing ' + str(
            n_phs) + ' phase files' + '\n')
        for key1 in dict_result_by_trial[list_prio[i]].keys():
            wmpe_max = 0.0
            wmpe_min = 90.0
            for key2 in dict_result_by_trial[list_prio[i]][key1].keys():
                if phstat_version == 'fortran':
                    new_key2 = os.path.join(clust_fold, os.path.split(key2)[1])
                    dict_result_by_trial[list_prio[i]][key1][new_key2] = copy.deepcopy(
                        dict_result_by_trial[list_prio[i]][key1][key2])
                    del dict_result_by_trial[list_prio[i]][key1][key2]
                    raw_clust_second_round.write(
                        new_key2 + "\t" + str(dict_result_by_trial[list_prio[i]][key1][new_key2]) + '\n')
                    wmpe = dict_result_by_trial[list_prio[i]][key1][new_key2]['wMPE']
                else:
                    raw_clust_second_round.write(
                        key2 + "\t" + str(dict_result_by_trial[list_prio[i]][key1][key2]) + '\n')
                    wmpe = dict_result_by_trial[list_prio[i]][key1][key2]['wMPE']
                list_to_remove.append(os.path.split(key2)[1])
                if wmpe > wmpe_max:
                    wmpe_max = wmpe
                if wmpe < wmpe_min and wmpe != 0.0:  # Because 0.0 is to itself, the reference
                    wmpe_min = wmpe
            name_cluster = (os.path.split(key1))[1]
            if wmpe_max == 0.0 and wmpe_min == 90.0:  # Then we have not clustered them
                wmpe_max = 0.0
                wmpe_min = 0.0
            table_clust_second_round.write('%-40s %-5i %-10f %-10f \n' % (name_cluster, n_phs, wmpe_max, wmpe_min))
        del raw_clust_second_round
        del table_clust_second_round
    return dict_result_by_trial


def startALIXEasPHSTAT(clust_fold, reference_hkl, dict_phs, cell, sg_number, tolerance=75.0, resolution=2.0, cycles=3,
                       f_fom=True):
    '''Minimal version of the ALIXE program that performs the same as phstat but with the possibility of testing more than one
    reference agains the same pool of solutions.
    Parameters:
    clust_fold = directory to perform the clustering
    reference_hkl = the path for the hkl given for shelxe, to filter by evalues the phs in case there is extrapolated data
    dict_phs = phs dictionary with values that are the phs and keys set to True if they have to be tested as references and False otherwhise
    cell = the unit cell parameters, in the form of a list of floats
    sg_number = integer with the space group number
    tolerance = float with the tolerance for the MPE value between the phase sets
    cycles = integer, number of cycles of phase combination to be performed
    f_fom = boolean. If true, mpe are weighted by Fvalues, otherwhise, by E values
    '''

    print '\n ALIXE started: ', time.strftime("%c")
    start = datetime.datetime.now()
    dictio_clusters = {}  # Dictionary to return the results

    # Get the symmetry information thanks to the sg_number
    symops = al.get_symops_from_sg_dictionary(sg_number)
    polar, origins = al.get_origins_from_sg_dictionary(sg_number)
    latt = al.get_latt_from_sg_dictionary(sg_number)

    # Calculate the coefficients needed for other calculations
    unit = cell[0] * cell[1] * cell[2]  # Sides of the cell
    squared_sides = []
    coef1 = []
    coef2 = []
    coef3 = []
    list_cos = []
    list_sin = []
    for n in range(0, 3):
        angle_rad = 1.74533 * math.pow(10, -2) * (cell[n + 3])  # Convert to radian units the angles
        cos_angle = numpy.cos(angle_rad)
        list_cos.append(cos_angle)
        sin_angle = numpy.sin(angle_rad)
        list_sin.append(sin_angle)
        exp1 = 2.0 * unit * (cos_angle / cell[n])  # 2.0 * volume * (cos_angle/side)
        coef1.append(exp1)
        sside = cell[n] ** 2
        squared_sides.append(sside)
    volume = unit * math.sqrt(1 - (list_cos[0]) ** 2 - (list_cos[1]) ** 2 - (list_cos[2]) ** 2 + (
    2 * list_cos[0] * list_cos[1] * list_cos[2]))  # V = abc * (1-cos²α-cos²β-cos²γ+2cosαcosβcosγ)^(-1/2)
    unit = unit / volume
    for n in range(0, 3):
        exp2 = 0.25 * unit * ((list_sin[n] / cell[n]) ** 2)
        coef2.append(exp2)
    unit = unit / volume
    coef3.append(unit * cell[0] * (list_cos[1] * list_cos[2] - list_cos[0]))
    coef3.append(unit * cell[1] * (list_cos[0] * list_cos[2] - list_cos[1]))
    coef3.append(unit * cell[2] * (list_cos[0] * list_cos[1] - list_cos[2]))

    # TODO: Revise this part with Isabel. Should be filtering the phs according to what phases are not extrapolated
    ##    # Set the reference for e-values calculation
    ##    array_hkl=al.read_hkl_file(reference_hkl)
    ##    print "BEGIN len(array_hkl)",len(array_hkl)
    ##    array_filter=al.check_resolution_limit(array_hkl,resolution,coef2,coef3)
    ##    file_hkl=open(reference_hkl[:-4]+'_filter_res.hkl','w')
    ##    print "RESOLUTION len(array_hkl)",len(array_filter)
    ##    array_filter_symm=al.find_equivalent_reflections(array_filter,symops,True) # Merge by symmetry
    ##    print "SYMMETRY len(array_hkl)",len(array_filter_symm)
    ##    array_filter_symm_sorted=al.sort_reflections_phs(array_filter_symm)
    ##    for r in range(len(array_filter_symm_sorted)):
    ##        # 3I4,2F8.2,I4
    ##        file_hkl.write('%4i%4i%4i%8.2f%8.2f\n' % (array_filter_symm_sorted[r][0],array_filter_symm_sorted[r][1],array_filter_symm_sorted[r][2],array_filter_symm_sorted[r][3],array_filter_symm_sorted[r][4]))
    ##    del file_hkl
    ##    sys.exit()

    # Generate a list with the phs from the dictionary that are references
    list_references = []
    for _, phs in enumerate(dict_phs.keys()):
        if dict_phs[phs] == True:
            list_references.append(phs)

    # TEMPORARY: Use the first phs file to calculate the evalues
    ref_for_evalues = list_references[0]
    array_ref = al.read_phs_file(ref_for_evalues)  # NOTE: My phi files are the same as phs files NOW
    array_ref = al.check_resolution_limit(array_ref, resolution, coef2,
                                          coef3)  # We will use the coefficients saved in coef2 and coef3
    array_ref = al.find_equivalent_reflections(array_ref, symops, False)
    array_ref = al.sort_reflections_phs(array_ref)
    # Find epsilon factor and 1/dsquared 
    epsilon, onedsquared, res_max = al.get_epsilon_1overdsquared_and_resmax(array_ref, symops, coef2, coef3)
    # Estimate E-values (whats changes in phs is phases, not structure factors)
    array_evalues, array_aux = al.get_evalues_and_array_aux(epsilon, onedsquared, res_max,
                                                            array_ref)  # e-values only in the array_evalues
    # Save the information that is common to all phs to avoid overloading the memory
    array_miller_indices = []
    array_f_sigf = []
    for i, _ in enumerate(array_ref):
        array_miller_indices.append([array_ref[i][0], array_ref[i][1], array_ref[i][2]])
        array_f_sigf.append([array_ref[i][3], array_ref[i][6]])

    array_ref = None

    n_phs = len(dict_phs.keys())
    print "\n Processing ", str(n_phs), " phase files"
    # I need two copies of the arrays because if there is a list with more than one reference
    # I do need to have the original values
    list_arrays_va_to_modify = []
    list_arrays_vb_to_modify = []
    list_arrays_to_modify = []
    list_names_phs = []
    for _, phs in enumerate(dict_phs.keys()):
        # TODO: Encapsulate this processing of the files in a function and then call threads.
        # TODO: be careful of either locking at the step of saving or changing the data structure where to save them
        print "\n Reading phs ", phs
        if phs[-4:] == ".phi":  # NOTE: My phi files are the same as phs files NOW
            array_phs = al.read_phs_file(phs)
        elif phs[-4:] == ".phs":
            array_phs = al.read_phs_file(phs)
        else:
            print "ALIXE only supports clustering of SHELXE phi or phs files"
            sys.exit()
        # Save the name so that we have the correct order
        list_names_phs.append(phs)  # phs is the full path name of the phs
        # Check resolution limit
        filter_res_array = al.check_resolution_limit(array_phs, resolution, coef2,
                                                     coef3)  # We will use the coefficients saved in coef2 and coef3
        # Find equivalent reflections with standard indices and transform phases
        array_merged = al.find_equivalent_reflections(filter_res_array, symops, False)
        # Sort reflections in order of higher h,k and l indexes
        sorted_array = al.sort_reflections_phs(array_merged)
        ##        # Filter by e-values of the hkl reference, so that we do not have extrapolated reflections
        ##        evalue_sorted_array=al.filter_array_by_evalues(sorted_array,array_evalues)
        # Reduce the array to the phi and FOM values
        reduced_array = al.reduce_array_phs_to_PHI_and_FOM(sorted_array)  # [FOM,PHI]
        list_arrays_to_modify.append(reduced_array)
        # For all, including the reference, obtain VA and VB
        arrva, arrvb = al.get_VA_and_VB(reduced_array)
        list_arrays_va_to_modify.append(arrva)
        list_arrays_vb_to_modify.append(arrvb)

    # Save the bigger arrays as pickle objects
    save_double_array = open(os.path.join(clust_fold, 'double_array.pkl'), 'wb')
    cPickle.dump(list_arrays_to_modify, save_double_array)
    save_double_array.close()
    save_arrva = open(os.path.join(clust_fold, 'arrva.pkl'), 'wb')
    cPickle.dump(list_arrays_va_to_modify, save_arrva)
    save_arrva.close()
    save_arrvb = open(os.path.join(clust_fold, 'arrvb.pkl'), 'wb')
    cPickle.dump(list_arrays_vb_to_modify, save_arrvb)
    save_arrvb.close()

    # Now test all references in the list    
    for _, reference in enumerate(list_references):
        print "\n Testing reference", reference
        name_phi = (os.path.split(reference)[1])[:-4] + '_ref.phi'
        # print 'name_phi',name_phi
        path_phi = os.path.join(clust_fold, name_phi)
        # print 'path_phi',path_phi
        position = list_names_phs.index(reference)
        dictio_clusters[path_phi] = {}
        # We load the arrays again for each reference
        back_double_array = open(os.path.join(clust_fold, 'double_array.pkl'), 'rb')
        list_arrays_to_modify = cPickle.load(back_double_array)
        back_double_array.close()
        # print "position",position
        # print 'list_names_phs[position]',list_names_phs[position]
        array_ref = copy.deepcopy(list_arrays_to_modify[position])
        back_arrva = open(os.path.join(clust_fold, 'arrva.pkl'), 'rb')
        list_arrays_va_to_modify = cPickle.load(back_arrva)
        back_arrva.close()
        back_arrvb = open(os.path.join(clust_fold, 'arrvb.pkl'), 'rb')
        list_arrays_vb_to_modify = cPickle.load(back_arrvb)
        back_arrvb.close()
        # Phase combination macrocycles
        for c in range(cycles):
            print '\n Cluster analysis cycle ', str(c + 1)
            print '\n N' + '\t' + 'wMPE' + '\t' + 'Dif' + '\t' + 'MapCC' + '\t' + 'Phase file'
            # print 'array_ref[0] beginning cycle',array_ref[0]
            dictio_clusters[path_phi][c + 1] = {}  # We will save the results cycle by cycle
            # Find and apply origin shifts relative to reference phases
            dict_wmpes_sel = {}
            list_wmpes_sel = []
            for pos, _ in enumerate(list_arrays_to_modify):
                name_current_phs = list_names_phs[pos]
                dict_wmpes_sel[name_current_phs] = {"wMPE": 0.0, "diff_wMPE": 0.0, "mapcc": 0.0}
                s = 0.0
                t = 0.0
                nreflections = len(list_arrays_to_modify[pos])
                for r, _ in enumerate(list_arrays_to_modify[pos]):
                    list_arrays_to_modify[pos][r][1] = list_arrays_vb_to_modify[pos][r]  # Phases
                    # Check the weight to apply to the structure factors
                    # Now we have the hkl and f info in other arrays: array_miller_indices and array_f_sigf 
                    if f_fom == True:
                        list_arrays_to_modify[pos][r][0] = array_f_sigf[r][0] * list_arrays_va_to_modify[pos][
                            r]  # StructureFactor * weight current phs
                    elif f_fom == False:
                        list_arrays_to_modify[pos][r][0] = array_evalues[r] * list_arrays_va_to_modify[pos][
                            r]  # E-value * weight current phs
                    s = s + list_arrays_to_modify[pos][r][0]
                    t = t + list_arrays_to_modify[pos][r][0] * abs(
                        ((900.0 + list_arrays_to_modify[pos][r][1] - list_arrays_vb_to_modify[pos][r]) % 360.0) - 180.0)
                    # Here is where when we have to apply the origin shifts and calculate the wMPE and the mapCC
                current_weights = list_arrays_va_to_modify[
                    pos]  # array with the ORIGINAL weights of the current phs. Does NOT change with every cycle
                sorted_list_wmpe, sorted_list_mapcc = al.apply_origin_shift_and_compute_wMPE_and_CC(nreflections,
                                                                                                    symops, array_ref,
                                                                                                    array_miller_indices,
                                                                                                    array_f_sigf,
                                                                                                    list_arrays_to_modify[
                                                                                                        pos],
                                                                                                    current_weights,
                                                                                                    array_evalues,
                                                                                                    array_aux,
                                                                                                    sg_number, f_fom)
                shift_to_apply = sorted_list_wmpe[0][1]
                wmpe_shift = sorted_list_wmpe[0][0]
                for i in range(len(sorted_list_mapcc)):  # Get the mapCC that corresponds to that shift
                    if sorted_list_mapcc[i][1] == shift_to_apply:
                        map_CC = sorted_list_mapcc[i][0]
                # Get difference top best/second best for the wMPE if we are not in a polar sg
                if polar == False:
                    diff_mpe = sorted_list_wmpe[1][0] - sorted_list_wmpe[0][0]
                elif polar == True:
                    # NOTE: Maybe I can find another indicator,or search the way to produce sth similar
                    diff_mpe = 0
                dict_wmpes_sel[name_current_phs] = {"wMPE": wmpe_shift, "diff_wMPE": diff_mpe, "mapcc": map_CC,
                                                    'shift': shift_to_apply}
                list_wmpes_sel.append((name_current_phs, wmpe_shift))
                # print 'name_current_phs ',name_current_phs,'sorted_list_wmpe ',sorted_list_wmpe
                for r, _ in enumerate(list_arrays_vb_to_modify[pos]):  # Modify the phases saved
                    list_arrays_vb_to_modify[pos][r] = ((720.0 + (360.0 * (((
                                                                            shift_to_apply[0] * array_miller_indices[r][
                                                                                0]) + (
                                                                            shift_to_apply[1] * array_miller_indices[r][
                                                                                1]) + (
                                                                            shift_to_apply[2] * array_miller_indices[r][
                                                                                2])) % 1.0)) +
                                                         list_arrays_vb_to_modify[pos][r]) % 360.0)
                    # Sort the phase sets according to their mean phase error
            sorted_by_mpe = sorted(list_wmpes_sel, key=lambda x: x[1], reverse=False)
            # print 'sorted_by_mpe',sorted_by_mpe
            # Check if anything will cluster under the tolerance we set
            if len(sorted_by_mpe) > 1:
                # print "Entered len(sorted_by_mpe)>1"
                if sorted_by_mpe[1][1] > tolerance:
                    # print "Entered sorted_by_mpe[1][1]>tolerance"
                    print 'There are not solutions to cluster under the tolerance set'
                    # print "path_phi",path_phi
                    dictio_clusters[path_phi] = {sorted_by_mpe[0][0]: {'wMPE': sorted_by_mpe[0][1],
                                                                       'diff_wMPE': dict_wmpes_sel[sorted_by_mpe[0][0]][
                                                                           'diff_wMPE'],
                                                                       'mapcc': dict_wmpes_sel[sorted_by_mpe[0][0]][
                                                                           'mapcc'], 'shift': [0.0, 0.0, 0.0]}}
                    # Write the phases with the resolution cut to a file
                    file_phi = open(path_phi, 'w')
                    pos = list_names_phs.index(sorted_by_mpe[0][0])  # Position of the reference array in the reading
                    nreflections = len(list_arrays_va_to_modify[pos])
                    for r in xrange(nreflections):
                        # 3I4,F9.2,F8.4,F8.1,F8.2
                        file_phi.write('%4i%4i%4i%9.2f%8.4f%8.1f%8.2f\n' % (
                        array_miller_indices[r][0], array_miller_indices[r][1], array_miller_indices[r][2],
                        array_f_sigf[r][0], list_arrays_va_to_modify[pos][r], list_arrays_vb_to_modify[pos][r],
                        array_f_sigf[r][1]))
                    del file_phi
                    return dictio_clusters
                    # else:
                    #   print "Hurra! We have things to cluster"
            # Combine phases for cluster
            # 1) Generate clean arrays of contributions VA and VB and for the combination
            va = [0.0 for _ in range(len(array_ref))]  # fom
            vb = [0.0 for _ in range(len(array_ref))]  # phases
            wa_combi = [0.0 for _ in range(len(array_ref))]
            # fa_combi=[0.0 for _ in range(len(array_ref))] FORTRAN: FA(I)=WA(I)*ET(I) (no consistente)
            # 2) Iterate over the phs files (consider sorting)
            for ind in range(len(sorted_by_mpe)):
                current_phs = sorted_by_mpe[ind][0]
                current_wmpe = sorted_by_mpe[ind][1]
                current_dif_wmpe = dict_wmpes_sel[current_phs]['diff_wMPE']
                current_mapcc = dict_wmpes_sel[current_phs]['mapcc']
                # print "current_phs",current_phs, "current_wmpe",current_wmpe
                # I need to find which is the position of this phs in the arrays
                pos = list_names_phs.index(current_phs)
                if current_wmpe < tolerance:
                    print ' ', ind + 1, '\t', "{0:.1f}".format(current_wmpe), '\t', "{0:.1f}".format(
                        current_dif_wmpe), '\t', "{0:.2f}".format(current_mapcc), '\t', current_phs
                    dictio_clusters[path_phi][c + 1][current_phs] = copy.deepcopy(dict_wmpes_sel[current_phs])
                    for r in xrange(nreflections):
                        list_arrays_to_modify[pos][r][1] = list_arrays_vb_to_modify[pos][r]
                        t = 0.0174533 * list_arrays_to_modify[pos][r][1]  # 0.0174533 is 2pi/360
                        s = list_arrays_va_to_modify[pos][r]
                        va[r] = va[r] + (s * (numpy.cos(t)))
                        vb[r] = vb[r] + (s * (numpy.sin(t)))
                        # print t,s,numpy.cos(t),numpy.sin(t)
                    t = numpy.sqrt(1.0 / (ind + 1))
                    for r in xrange(nreflections):
                        wa_combi[r] = min(1.0, numpy.sqrt(math.pow(va[r], 2) + math.pow(vb[r], 2)) * t)
                        # Use either structure factors or normalized structure factors for the combined phases
                        list_arrays_to_modify[pos][r][0] = wa_combi[r] * array_evalues[
                            r]  # e-weighted,as it is in the fortran prototype
                        ##                        if f_fom==True:
                        ##                            list_arrays_to_modify[pos][r][0]=array_f_sigf[r][0]*wa_combi[r] #  StructureFactor * new_weight
                        ##                        elif f_fom==False:
                        ##                            alist_arrays_to_modify[pos][r][0]=array_evalues[r]*wa_combi[r] # E-value * new_weight
                        if wa_combi[r] > 0.0001:
                            list_arrays_to_modify[pos][r][1] = (720.0 + 57.29578 * numpy.arctan2(vb[r], va[r])) % 360
                    # Compute again the shift, because now we changed the phases
                    sorted_list_wmpe2, sorted_list_mapcc2 = al.apply_origin_shift_and_compute_wMPE_and_CC(nreflections,
                                                                                                          symops,
                                                                                                          array_ref,
                                                                                                          array_miller_indices,
                                                                                                          array_f_sigf,
                                                                                                          list_arrays_to_modify[
                                                                                                              pos],
                                                                                                          current_weights,
                                                                                                          array_evalues,
                                                                                                          array_aux,
                                                                                                          sg_number,
                                                                                                          f_fom)
                    shift_to_apply2 = sorted_list_wmpe2[0][1]
                    wmpe_shift2 = sorted_list_wmpe2[0][0]
                    if shift_to_apply2 != [0.0, 0.0, 0.0]:  # Check if there ever is another shift after
                        if not (abs(shift_to_apply2[0]) < 0.01 and abs(shift_to_apply2[1]) < 0.01 and abs(
                                shift_to_apply2[2]) < 0.01):
                            print "There is another shift to apply after phase merging. CHECK!", shift_to_apply2
                            for r in xrange(nreflections):
                                list_arrays_to_modify[pos][r][1] = ((720.0 + (360.0 * (((shift_to_apply2[0] *
                                                                                         array_miller_indices[r][0]) + (
                                                                                        shift_to_apply2[1] *
                                                                                        array_miller_indices[r][1]) + (
                                                                                        shift_to_apply2[2] *
                                                                                        array_miller_indices[r][
                                                                                            2])) % 1.0)) +
                                                                     list_arrays_to_modify[pos][r][1]) % 360.0)
                    for r in xrange(
                            nreflections):  # Change the phases in the reference array so that they correspond to the combined map
                        array_ref[r][1] = list_arrays_to_modify[pos][r][1]
                        # print 'array_ref[0] ending cycle',array_ref[0]
                        # print "POST list_arrays_to_modify[0]",list_arrays_to_modify[0][0]
                        # print "POST list_arrays_va_to_modify[0]",list_arrays_va_to_modify[0][0]
                        # print "POST list_arrays_vb_to_modify[0]",list_arrays_vb_to_modify[0][0]
        # Write the cumulative phases to a file
        file_phi = open(path_phi, 'w')
        # print 'path_phi',path_phi
        for r in xrange(len(array_ref)):
            # 3I4,F9.2,F8.4,F8.1,F8.2
            file_phi.write('%4i%4i%4i%9.2f%8.4f%8.1f%8.2f\n' % (
            array_miller_indices[r][0], array_miller_indices[r][1], array_miller_indices[r][2], array_f_sigf[r][0],
            wa_combi[r], array_ref[r][1], array_f_sigf[r][1]))
        del file_phi

    print '\n Cleaning up....'
    for fich in os.listdir(clust_fold):
        if fich.endswith('.pkl'):
            os.remove(os.path.join(clust_fold, fich))
    # Return just the last cycle results
    dictio_clusters_third = {}
    for key in dictio_clusters.keys():
        dictio_clusters_third[key] = dictio_clusters[key][cycles]
    return dictio_clusters_third
    end = datetime.datetime.now()
    print " end - start time ", end - start


def startALIXEforARCIMBOLDO(clust_fold, cluster_id, reference_hkl, cell, sg_symbol, tolerance_list=[60.0, 88.0],
                            resolution=2.0, cycles=3, f_fom=True, run='SHREDDER', mode='one_step', fishing_folder=None,
                            confibor=None, ncores=1):
    """ Function to call ALIXE internally from other program. The arguments needed are:
    clust_fold = directory to perform the clustering
    cluster_id = integer with the rotation cluster to evaluate
    reference_hkl = the path for the hkl given for shelxe, to filter by evalues the phs in case there is extrapolated data
    cell = the unit cell parameters, in the form of a list (STR! CONVERT TO FLOAT)
    sg_symbol = string with the space group symbol
    tolerance = list of floats with tolerances for mpe 
    cycles = integer, number of cycles of phase combination to be performed
    f_fom = boolean. If true, mpe are weighted by Fvalues, otherwhise, by E values
    run = string. Can be 'ARCIMBOLDO', 'BORGES', 'SHREDDER'
    mode = string. Can be 'fish','full','one_step','two_steps'
    fishing_folder = string. Path to the folder in which ALIXE has to fish in case of that mode. 
    confibor = ConfigParser object
    ncores = number of real cores in the machine, as read by ARCIMBOLDO or ARCIMBOLDO_BORGES"""

    print '\n ALIXE started: ', time.strftime("%c")
    dictio_clusters = {}  # Dictionary to return the results
    skip_FOM_reading = False
    ent_present = False
    hard_limit_phs = 100
    skip_copyfiles = False
    skip_ins_generation = False

    # Check if alixe has been already performed on that folder
    basepath_clustfold = clust_fold
    print "Root folder for clustering", basepath_clustfold
    clust_fold = os.path.join(basepath_clustfold, str(cluster_id))
    print "Currrently processing folder ", clust_fold
    if not os.path.exists(clust_fold):
        os.makedirs(clust_fold)  # Generate the directory if it is not already there
    path_for_tuple = os.path.join(clust_fold, 'final_alixe_tuple.pkl')
    if os.path.exists(path_for_tuple):  # Then first round was completed
        if mode == 'one_step':
            back_final_tuple = open(path_for_tuple, 'rb')
            (path_ins, final_dict) = cPickle.load(back_final_tuple)
            print "Alixe has been succesfully run already. Returning the tuple with the results"
            return (path_ins, final_dict)
        elif mode == 'two_steps':
            # In any case we do not need to reperform steps
            print "We are in the level of setting the skips to True"
            skip_FOM_reading = True
            skip_copyfiles = True
            skip_ins_generation = True
            # Then, the first round was finished but we need to do the second
            path_list_clusters = os.path.join(basepath_clustfold, 'list_clustered.pkl')
            if os.path.exists(path_list_clusters):
                print "We did the second step already on some folder"
                back_list = open(path_list_clusters, 'rb')
                list_to_remove = cPickle.load(
                    back_list)  # This list contains the phi files that have been already clustered at some point in the second step
            elif not os.path.exists(path_list_clusters):
                # Then let's create it because we are going to append things to it after we cluster using the references of this rotation cluster
                list_to_remove = []

    if mode not in ['fish', 'full', 'one_step', 'two_steps']:
        print "Sorry, you need to provide a valid mode for alixe: either 'fish','full','one_step' or 'two_steps' "
        sys.exit(0)

    # Generate a fake ins that SHELXE can read for expansions
    if not skip_ins_generation:
        for i in range(len(cell)):
            cell[i] = float(cell[i])
        sg_number = al.get_space_group_number_from_symbol(sg_symbol)
        path_ins = os.path.join(clust_fold, 'name.ins')
        al.generate_fake_ins_for_shelxe(path_ins, cell, sg_number)

    if run == 'SHREDDER' or run == 'BORGES':
        if not isinstance(confibor, ConfigParser.ConfigParser):
            print "Sorry, you need to provide a valid bor object in order to run ALIXE on ", run  # Check the bor
            sys.exit()
        topexp = confibor.get('ARCIMBOLDO-BORGES', 'topexp')
        basepath = confibor.get('GENERAL', 'working_directory')
        if not skip_copyfiles:
            list_chosen = al.get_files_from_9_EXP_BORGES(basepath, clust_fold, cluster_id=cluster_id, mode=9,
                                                         hard_limit_phs=hard_limit_phs)
        subrun = 'BORGES'  # To know how to read the solutions. Spherical is generating a BORGESARCI job
        if confibor.has_option('GENERAL', 'ent_path'):  # therefore the lst will have post mortem info
            ent_path = confibor.get('GENERAL', 'ent_path')
            if os.path.exists(ent_path):
                ent_present = True
    elif run == 'ARCIMBOLDO':
        if not confibor.isinstance(ConfigParser.ConfigParser):
            print "Sorry, you need to provide a valid bor object in order to run ALIXE on ", run  # Check the bor
            sys.exit()
            # TODO: Get the link to the files and save them in the CLUSTERING folder
            # TODO: Check if there is an ent file provided in the bor and therefore the lst will have post mortem info
    else:
        print "Sorry, ALIXE can just run on SHREDDER, ARCIMBOLDO or BORGES"
        sys.exit()

    # Read the FOMs of the PHS files in the phaser and shelxe steps
    if not skip_FOM_reading:
        phs_files = al.list_files_by_extension(clust_fold, 'phs')
        dictio_fragments = {}
        for phs in phs_files:
            dictio_fragments[phs[:-4]] = {'rot_cluster': None, 'llg': None, 'zscore': None, 'initcc': None,
                                          'efom': None, 'pseudocc': None, 'list_MPE': None}
        dictio_fragments = al.get_FOMs_from_lst_files_in_folder(clust_fold, dictio_fragments, ent_present)
        dictio_fragments = al.get_FOMs_from_sum_files_in_folder(basepath, clust_fold, dictio_fragments, ent_present,
                                                                subrun)

    # First round

    if mode == 'one_step':
        tolerance = tolerance_list[0]
        dict_first_round = {}  # To save the results per rotation cluster
        # Call the function clustering_all_in_ALIXE_under_a_tolerance per each of them and save the results
        dict_first_round = clustering_all_in_ALIXE_under_a_tolerance(clust_fold, reference_hkl, list_chosen, cell,
                                                                     sg_symbol, tolerance, resolution, cycles, f_fom,
                                                                     ncores=topexp)
        # Save the FOM information for priorization in second round if there is
        for cluster in dict_first_round.keys():
            if dict_first_round[cluster]['n_phs'] > 1:
                list_llg = []
                list_zscore = []
                for phs in dict_first_round[cluster]['dictio_result'].keys():  # For each phs in the cluster
                    list_llg.append(dictio_fragments[phs[:-4]]['llg'])
                    list_zscore.append(dictio_fragments[phs[:-4]]['zscore'])
                # We add the top LLG or ZSCORE as representative of the cluster
                dict_first_round[cluster]['dict_FOMs']['llg'] = (sorted(list_llg, reverse=True))[0]
                dict_first_round[cluster]['dict_FOMs']['zscore'] = (sorted(list_zscore, reverse=True))[0]
            elif dict_first_round[cluster]['n_phs'] == 1:
                name_file = ((dict_first_round[cluster]['dictio_result'].keys())[0])[:-4]
                dict_first_round[cluster]['dict_FOMs']['llg'] = dictio_fragments[name_file]['llg']
                dict_first_round[cluster]['dict_FOMs']['zscore'] = dictio_fragments[name_file]['zscore']
        save_final_tuple = open(os.path.join(clust_fold, 'final_alixe_tuple.pkl'), 'wb')
        cPickle.dump((path_ins, dict_first_round), save_final_tuple)
        save_final_tuple.close()
        # Return only the clusters that contain more than one phase set so that they are expanded
        print "len(dict_first_round) before cleaning the single clusters", len(dict_first_round)
        for cluster in dict_first_round.keys():
            if dict_first_round[cluster]['n_phs'] == 1:
                del dict_first_round[cluster]
        print "len(dict_first_round) after cleaning the single clusters", len(dict_first_round)
        return (path_ins, dict_first_round)

    elif mode == 'full':
        print "full mode is still not implemented"
        sys.exit(0)

    # Second round
    if mode == 'two_steps':
        print "Two steps mode... Not yet implemented"
        sys.exit(0)
        tolerance = tolerance_list[1]
        # Read all dictionaries from all rotation clusters
        dictio_clust_by_rotclu = {}
        limit_ref = 0
        list_phis_to_evaluate = []  # List with tuples (rotclu,cc,phi) for sorting the files according to FOM (topLLG?) and to remove the files we have already fished
        cluster_folders = [d for d in os.listdir(basepath_clustfold) if
                           os.path.isdir(os.path.join(basepath_clustfold, d))]
        for cluster_folder in cluster_folders:
            path_cluster = os.path.join(basepath_clustfold, cluster_folder)
            path_pkl = os.path.join(path_cluster, 'final_alixe_tuple.pkl')
            pkl_first_round = open(path_pkl, 'rb')
            path_ins, dictio_clust_first_round = cPickle.load(pkl_first_round)
            dictio_clust_by_rotclu[cluster_folder] = dictio_clust_first_round
            for phi_file in dictio_clust_first_round.keys():
                list_phis_to_evaluate.append(
                    (phi_file, cluster_folder, dictio_clust_first_round[phi_file]['dict_FOMs']['llg']))
                if cluster_folder == str(cluster_id):
                    limit_ref = limit_ref + 1
        sorted_list_phis_to_evaluate = sorted(list_phis_to_evaluate, key=operator.itemgetter(2, 1, 0))
        final_list_to_evaluate = []
        for i in range(len(sorted_list_phis_to_evaluate)):
            if sorted_list_phis_to_evaluate[i][0] not in list_to_remove:
                final_list_to_evaluate.append(sorted_list_phis_to_evaluate[i][0])
        print "****** Lista a evaluar *******"
        for i in range(len(final_list_to_evaluate)):
            print i, final_list_to_evaluate[i]
        sys.exit(0)
        # dict_second_round=clustering_all_in_ALIXE_under_a_tolerance(clust_fold,reference_hkl,final_list_to_evaluate,cell,sg_symbol,tolerance,resolution,cycles,f_fom,ncores=limit_ref)
        print 'dict_second_round', dict_second_round
        sys.exit(0)
        # We need to generate a list of the things we do not need to cluster anymore because were already tried    
        list_to_remove_file = open(path_list_clusters,
                                   'wb')  # We overwrite the pickle file with a new one that contains the current list_to_remove
        cPickle.dump(list_to_remove, list_to_remove_file)
        sys.exit(0)


# TODO: Function to call recursively startALIXE in order to get all clusters under a tolerance

def clustering_all_in_ALIXE_under_a_tolerance(clust_fold, reference_hkl, list_phs, cell, sg_symbol, tolerance,
                                              resolution, cycles, fom_weigth, ncores):
    '''This function calls recursively to startALIXE in order to generate all possible clusters under a certain tolerance
    It does it by discarding the phs that had already been clustered with a previous reference'''
    dictio_clusters = {}
    sg_number = al.get_space_group_number_from_symbol(sg_symbol)
    # Convert the cell to float
    for i in range(len(cell)):
        cell[i] = float(cell[i])
    while (len(list_phs) > 0) and (len(dictio_clusters.keys()) <= ncores):
        dict_phs = {}
        for i in range(len(list_phs)):
            if i == 0:
                dict_phs[list_phs[i]] = True
            else:
                dict_phs[list_phs[i]] = False
        results = startALIXEasPHSTAT(clust_fold, reference_hkl, dict_phs, cell, sg_number, tolerance, resolution,
                                     cycles, fom_weigth)
        # I can safely assume that if I just give one reference,
        # the results dictionary keys list will have just one phi name
        phi_name = results.keys()[0]
        phs_to_remove = results[phi_name].keys()
        n_phs = len(phs_to_remove)
        dictio_clusters[phi_name] = {'dictio_result': results[phi_name], 'n_phs': n_phs, 'dict_FOMs': {}}
        file_debug_first_round = open(os.path.join(clust_fold, "clusters_dictionary.txt"), 'a')
        file_debug_first_round.write(
            phi_name + '\t' + 'dictio_result ' + str(results[phi_name]) + 'n_phs ' + str(n_phs) + "\n")
        file_debug_first_round.write("**********************************************")
        file_debug_first_round.close()
        new_list_phs = []
        for phs in list_phs:
            if phs in phs_to_remove:
                continue
            else:
                new_list_phs.append(phs)
        list_phs = new_list_phs
    return dictio_clusters


# Main module
if __name__ == "__main__":
    if len(sys.argv) == 1:
        print "\nUsage: ALIXE.py name.ls [options]"
        print "\nOptions:"
        print "-s=name_reference Use the name_reference phase file in the ls file as reference for clustering. It also accepts a list separated by commas."
        print "-t= Use a tolerance of N degrees for the mean phase difference between the phase sets compared"
        print "-r=N Use data to a resolution of N angstroms. Default is 2.0"
        print "-c=N Do N macrocycles of phase clustering between the sets"
        # print "-h=hkl_filename Reference hkl to calculate evalues"
        # TODO: it is better that symmetry is given either as a number of the sg or a line in the ls (if maps are given)
        print "-p=pdb_filename Read symmetry information from a pdb file"
        print "-w=f or -w=e Use f-weights or e-weights for the calculation of mean phase errors. Default is f"
        print "-d=working_directory Indicate the folder were to put the results. Default is the current working directory"
        sys.exit()
    print "\n Starting date & time " + time.strftime("%c")  # Print initial time
    # Read ls file and save the files in a list
    name_ls = sys.argv[1]
    fich_ls = open(name_ls, 'r')
    list_phs = fich_ls.read().splitlines()
    dictio_phs = {}
    for phs in list_phs:
        dictio_phs[phs] = False
    n_phs = len(list_phs)
    print "\n ", str(n_phs), " phase files found in ", name_ls
    # Read the arguments or set the defaults
    list_options = sys.argv[2:]
    # Defaults
    reference_set = list_phs[0]  # The first one in the ls
    position_ref = 1
    tolerance = 60
    resolution = 2.0
    dictio_phs[reference_set] = True  # Default is to use the first phs in the list
    cycles = 3
    f_fom = True
    reference_hkl = None  # TEMPORARY, NEED TO CHECK WITH ISABEL
    clust_fold = os.getcwd()
    cell = None

    for option in list_options:
        if option.startswith("-s"):
            # Check if they have given a single reference or a list separated by comma
            # if len(option[3:].split(','))>1:
            #    list_references=option[3:].split(',')
            #    for reference in list_references:
            #        dictio_phs[reference]=True
            # elif len(option[3:].split(','))==1: # Then we have a single reference
            # print 'option[3:]',option[3:]
            ref = option[3:]
            dictio_phs[ref] = True
            # If it is not the same one, remove te True from the default
            if ref != reference_set:
                dictio_phs[reference_set] = False
        elif option.startswith("-t"):
            tolerance = float(option[3:])
        elif option.startswith("-r"):
            resolution = float(option[3:])
        elif option.startswith("-h"):
            reference_hkl = option[3:]
        elif option.startswith("-c"):
            cycles = int(option[3:])
        elif option.startswith("-w"):
            weight_fom = option[3:]
            if weight_fom == 'e':
                f_fom = False
        elif option.startswith("-p"):  # Read the symmetry from a pdb file
            pdb_path = str(option[3:])
            cell, sg = al.read_cell_and_sg_from_pdb(pdb_path)  # Cell is a list of floats
            sg_number = al.get_space_group_number_from_symbol(sg)
        elif option.startswith("-d"):
            clust_fold = str(option[3:])

    # Check we have the symmetry info
    if cell == None:
        print "Sorry, you need to provide some pdb file to extract the symmetry information"
        sys.exit(0)

    # Generate a fake ins that SHELXE can read for expansions
    path_ins = os.path.join(clust_fold, 'name.ins')
    al.generate_fake_ins_for_shelxe(path_ins, cell, sg_number)

    # print 'dictio_phs before start alixe',dictio_phs
    # Call the startALIXE function with the appropiate input
    startALIXEasPHSTAT(clust_fold, reference_hkl, dictio_phs, cell, sg_number, tolerance, resolution, cycles, f_fom)
    print "\n Ending date & time " + time.strftime("%c")  # Print initial time