#Statistics.py

""" Authors: Rasa Muller | Barbara Caiffi | Matteo Sanguineti """

import os, sys, ROOT, uproot
from array import array
from math import *
import numpy as np


# Abbreviations:
# sd        = sin declination
# ty        = number of datataking years
# LLR       = Likelihood ratio
# TS        = test statistic
# 
# gr_<...>  = TGraph
# l_<...>   = list

# Setup:
# StatisticalAnalysis() is a composite class
# LLR_TS_distribution() is a component class
# A StatisticalAnalysis will have multiple LLR_TS_distributions 
# (at least one H0 LLR_TS_distribution, and one H1 LLR_TS_distribution are needed)
# 


class StatisticalAnalysis(): # composite class

    def __init__(self):
        # self.computingsystem = "lyon"
        self.run_identifier = "RunID"
        self.inputpath = "./input/path/"     #TODO: Buildin safety: "/" at the end
        self.outputpath = self.inputpath

        self.show_graphical_output = True
        if self.show_graphical_output == True:
            ROOT.gROOT.SetBatch(False)
        elif self.show_graphical_output == False:
            ROOT.gROOT.SetBatch(True)
        self.save_plots = True

        self.datatakingyears = ["ty1.0"] #[yr^-1] 
        self.sindecs = ["sd-0.9"]
        self.dict_of_LLR_TS_distributions = {}


    def set_LLR_TS_distributions()
        for YR in self.datatakingyears:
            for SD in self.sindecs:
                for Nsig in self.

                    dict_of_LLR_TS_distributions[SD+"_"+YR+"_"+Nsig] = LLR_TS_distribution(SD, YR, Nsig):  # component class


    def set_different_outputpath(self, outputpath_to_set):
        '''function to set outputpath do a different path than default (=inputpath)'''
        self.outputpath = outputpath_to_set

        if not os.path.exists(outputpath_to_set):  # create outputdir if it doesn't exist yet
            print("creating: ", outputpath_to_set)
            os.mkdir(outputpath_to_set)

        return


    def get_nsigindset(self, SD, YR, multiply_by_years = False):
        '''get all analysed signal events from existing files in input directory
           TODO? get info from #entries => will take more time because all files then should be opened
        '''

        x_NsigInDset = []
        float_YR = float(YR.split('ty')[-1])
        float_SD = float(SD.split('sd')[-1])
        
        for FILE in os.listdir(self.inputpath):
            if self.run_identifier in FILE and SD in FILE and YR in FILE and ".root" in FILE and "RESULTS" not in FILE:

                #get info from filename
                n_sig = str(FILE).split('_c')[1].split('_')[0]
                if multiply_by_years == True:
                    NsigInDset = round(float(n_sig) * float_YR, 3)  #CHECK round by 3 digits always ok?
                else:
                    NsigInDset = round(float(n_sig), 3)             #CHECK round by 3 digits always ok?
                x_NsigInDset.append( "c"+str(NsigInDset) )

        return x_NsigInDset


    # def determine_CL_value_from_array(a_h1, teststatistic_data):
    #     '''Funtion to determine the CL-value of the data with given test statistic: 
    #     determine P(teststatistic < teststatistic_data | H1) by integration
    #     '''
    #     a_h1 = np.sort(a_h1) #(just in case it's not done yet)

    #     # integral = "number of elements above median" / normfactor
    #     # so: count number of elements higher than test statistic, and norm by total number of entries
    #     CL_value = np.sum(a_h1 >= teststatistic_data) / len(a_h1)
        
    #     return CL_value


    def plot_flux_sindecl(l_years, ):

        #######################################################################
        # NOT SURE IF THIS IS NEEDED HERE IF IT'S ALREADY DEFINED IN THE INIT
        if self.show_graphical_output == False:
            ROOT.gROOT.SetBatch(True)
        elif self.show_graphical_output == True:
            ROOT.gROOT.SetBatch(False)
        #######################################################################

        c_plot_flux_sindecl = ROOT.TCanvas('c_plot_flux_sindecl', 'c_plot_flux_sindecl', 600, 600)

        for i, YR in enumerate(l_years):

            x_sd, y_fluxlimit = flux_sindecl(YR, p3s)#*[p3s/p5s/90CL/95CL]=>integral&H0/H1?*)

            n    = len(x_sd[YR])
            x    = array('d', x_sd[YR])
            y  = array('d', y_fluxlimit[YR])

            gr_p[YR] = ROOT.TGraph(n, x, y)

            gr_p[YR].SetMarkerStyle(4)
            gr_p[YR].SetMarkerColor(1)
            gr_p[YR].SetLineColor(1)
            gr_p[YR].SetLineStyle(i*2+1)
            gr_p[YR].Draw("C*")

            leg.AddEntry(gr_p[YR], str(float(YR.split("ty")[-1])/2.)+"yr datataking ARCA (2BB)", "l")

        leg.Draw("same");
        c_plot_flux_sindecl.SetLogy()
        c_plot_flux_sindecl.Draw()
        c_plot_flux_sindecl.Update()

        if self.save_plots == True:
            c_plot_flux_sindecl.SaveAs(self.outputpath+"plot_flux_sindecl.png")


        return c_plot_flux_sindecl



class LLR_TS_distribution():  # component class
    def __init__(self, SD, YR, Nsig): # TODO SD/YR/NSIG are string inputs => change to float
        self.SD = SD
        self.YR = YR
        self.Nsig = Nsig
        if Nsig == 'c0.0':
            self.H = 0 # H0
        else:
            self.H = 1 # H1
        self.name = 'H'+str(self.H)+"_"+YR+"_"+SD
        print(self.name)
        self.arrayform = self.get_LLR_TS_distrib_from_files(SD, YR, Nsig) #array_with_LLR_TS_values
        self.histform = self.array_to_hist(array_with_LLR_TS_values, True, self.name)
        if self.H == 0:
            self.fittotail = self.exp_fit_right_tail_of_hist( self.histform )
        self.median = self.get_median_of_array(array_with_LLR_TS_values)


    def get_LLR_TS_distrib_from_files(self, SD, YR, N):
        ''' function to get H0 or H1 LLR distribution (teststatistic distribution)
            if N == 0 we're dealing with the H0 distribution, if N>0 it's a H1 distributions
            input: sin(decl): SD, years: YR, N signal events in dataset
            returns: H0 or H1 LLR distribution (depending on N)
        '''
        count       = 0

        for FILE in os.listdir(self.inputpath):

            ROOT.gROOT.cd()

            if self.run_identifier in FILE and SD in FILE and YR in FILE and N+"_" in FILE and ".root" in FILE and "RESULTS" not in FILE:

                count+=1
                print( "%i -> %s" %(count, FILE) )
                if N == "c0.0":
                    tree_Hmodel      = uproot.open(self.inputpath+FILE+":bkg_model")
                else:
                    tree_Hmodel      = uproot.open(self.inputpath+FILE+":sigbkg_model")
                # Get the LLR values
                # --H0 model--
                np_array_model_flik_H0    = tree_Hmodel["fit_H0.fitted_likelihood"].array(library="np")
                np_array_model_flik_H1    = tree_Hmodel["fit_H1.fitted_likelihood"].array(library="np")
                LLR_TS_array = np_array_model_flik_H1 - np_array_model_flik_H0
                #     if (entry.fit_H1.fitted_likelihood-entry.fit_H0.fitted_likelihood) < -1e-3:
                #         print("!!! NEGATIVE LLR !!! \ncheck what's going on")
                print("-------->Total number of Entries in TS distribution: ", len(LLR_TS_array))

                return np.sort(LLR_TS_array)


    def get_median_of_array(self, inputarray):
        '''function to calculate the median of an array
        '''
        inputarray = np.sort(inputarray) #just in case
        return np.median(inputarray)


    def array_to_hist(self, inputarray, normalise = True, title= "array_to_hist"):
        '''function to turn an array into a root histogram
        '''
        nbins = int(( max(inputarray) - min(inputarray) ) * 2 )                           # PRIO TODO! flexible/fixed binning? should be fixed for full analysis and not dependent on values of array?
        hist = ROOT.TH1D(title, title, nbins, min(inputarray), max(inputarray))
        for i in range(len(inputarray)):
            hist.Fill(inputarray[i])
        if normalise == True:
            hist.Scale( 1 / hist.Integral(0, hist.GetNbinsX()+1, "width") )
        return


    # def plot_Hypothesis_test(LLR_teststatistic_H0, LLR_teststatistic_H1, f_fitH0tail_bool=True):
    #     ''' TODO plot_Hypothesis_test_2 / plot_Hypothesis_test_1 / plot_Hypothesis_test'''
    #     OPTION TO DO IT STACKED FOR GIVEN ARRAY OF H1!
    #     title="Hypothesis Testing"
    #     savingname="./Hypothesis_test.png"

    def exp_fit_right_tail_of_hist(hist_to_fit):
        '''function that fits an exponential to the right tail of given histogram
        '''
        ffit = ROOT.TF1("ffit","expo", hist_to_fit.GetMean()+hist_to_fit.GetStdDev(), hist_to_fit.GetXaxis().GetXmax() )  # range min, max
        
        hist_to_fit.Fit(ffit,"R")      # 'exp([Constant]+[Slope]*x)'

        # Get fit-parameter info
        f_fit_h0tail = hist_to_fit.GetFunction("ffit")
        f_fit_h0tail.SetLineColor(2)
        f_fit_h0tail.SetLineStyle(4)

        p0 = f_fit_h0tail.GetParameter(0)
        p0_err = f_fit_h0tail.GetParError(0)
        p1 = f_fit_h0tail.GetParameter(1)
        p1_err = f_fit_h0tail.GetParError(1)
        print('\nexp_fit_right_tail_of_hist FITTED FUNCTION\nFITTED FUNCTION = exp([Constant]+[Slope]*x)\n where: [Constant] = {0} +- {1}, \n and:   [Slope] = {2} +- {3}, \nso: exp({4}+{5}*x)\n'.format(p0, p0_err, p1, p1_err, round(p0, 2), round(p1, 2)))

        return f_fit_h0tail


    def GetThreshold(func, prob):
        '''Create a graph with the fit values to read 3sigma & 5sigma thresholds
        '''
        found = False

        x = 0.0                         #Initial value  --> TBD
        print("In GetThreshold : integral=",func.Integral(x,1000) )

        while (x < 80 and found == False): 
            #y = func.Eval(x)
            y = func.Integral(x,1000)
            if(y <=  prob ):
               found = True
               print("In GetThreshold : found y=",func.Integral(x,1000) )
          
        x_c = x
        x = x+0.001

        return (x_c + x)/2.