import ROOT from ROOT import TMath import aa import os import array import fluxes as flux import math from copy import copy aa.include(os.environ["AADIR"]+"/ana/dst/summarize_mc_trks.hh") aa.include(os.environ["AADIR"]+"/ana/dst/summarize_hits.hh") aa.include(os.environ["AADIR"]+"/ana/dst/summarize_rec_trks.hh") aa.include(os.environ["AADIR"]+"/ana/dst/summarize_crkv_hits.hh") aa.include(os.environ["AADIR"]+"/ana/dst/cascade_summary.hh") ####################################################################################################### ####################################################################################################### # list of DST files that will be used ####################################################################################################### ####################################################################################################### # paths pointing to files where the bdt_trk is already appended dirname = "/data/km3net/users/tjuanve/ARCA_MC/v6.2/bdt/2021-10-03_bdt_output_tracks_cascvar_LR_recovar/" dirname_mu = "/data/km3net/users/tjuanve/ARCA_MC/v6.2/bdt/2021-10-03_bdt_output_tracks_cascvar_LR_recovar/" allfiles= { "mu_10T" : ROOT.EventFile(dirname_mu+"mcv6.mupage_10T.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "mu_50T" : ROOT.EventFile(dirname_mu+"mcv6.mupage_50T.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "numuCC" : ROOT.EventFile(dirname+"mcv6.gsg_numu-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "numuNC" : ROOT.EventFile(dirname+"mcv6.gsg_numu-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinumuCC" : ROOT.EventFile(dirname+"mcv6.gsg_anumu-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinumuNC" : ROOT.EventFile(dirname+"mcv6.gsg_anumu-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "nueCC" : ROOT.EventFile(dirname+"mcv6.gsg_nue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "nueNC" : ROOT.EventFile(dirname+"mcv6.gsg_nue-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinueCC" : ROOT.EventFile(dirname+"mcv6.gsg_anue-CCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinueGLRES" : ROOT.EventFile(dirname+"mcv6.gsg_anue-GLRESAtomic_1e5-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinueNC" : ROOT.EventFile(dirname+"mcv6.gsg_anue-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "nutauCCshower" : ROOT.EventFile(dirname+"mcv6.gsg_nutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "nutauCCmuon" : ROOT.EventFile(dirname+"mcv6.gsg_nutau-CCHEDIS-muon_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "nutauNC" : ROOT.EventFile(dirname+"mcv6.gsg_nutau-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinutauCCshower" : ROOT.EventFile(dirname+"mcv6.gsg_anutau-CCHEDIS-shower_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinutauCCmuon" : ROOT.EventFile(dirname+"mcv6.gsg_anutau-CCHEDIS-muon_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root"), "antinutauNC" : ROOT.EventFile(dirname+"mcv6.gsg_anutau-NCHEDIS_1e2-1e8GeV.sirene.jte.jchain.aashower.dst.bdt_trk.root") } ####################################################################################################### ####################################################################################################### # list of variables from DST files that will be used ####################################################################################################### ####################################################################################################### class variable( object ) : def __init__( self, name , expr ) : self.name = name self.ptr = array.array('f',[0.0]) self.expr = expr self._f = eval( 'lambda trks,sum_jgandalf,crkv_hits,sum_casc,bdt_trk :' + self.expr ) def set( self, trks, sum_jgandalf, crkv_hits, sum_casc , bdt_trk) : self.ptr[0] = self._f( trks, sum_jgandalf, crkv_hits, sum_casc, bdt_trk ) if math.isinf(self.ptr[0]) or math.isnan(self.ptr[0]): if self.name == "inertia_ratio": self.ptr[0] = 5 else: self.ptr[0] = 1e10 def value( self ) : return self.ptr[0] vardesc = """ 0 cas_z trks[1].pos.z 1 cas_R TMath.Sqrt(TMath.Power(trks[1].pos.x,2)+TMath.Power(trks[1].pos.y,2)) 2 cas_dz TMath.ACos(trks[1].dir.z) 3 cas_e TMath.Log10(trks[1].E) 4 cas_lik trks[1].lik 5 cas_nhits TMath.Log10(trks[1].fitinf[1]) 6 crkv_aacasc crkv_hits[1].nhits[2] 7 crkv100_aacasc crkv_hits[1].nhits_100m[2] 8 goldP_aashower sum_casc.goldP_aashower 9 nhits_qstrat sum_casc.nhits_qstrat 10 ntot_qstrat TMath.Log10(sum_casc.ntot_qstrat) 11 goldP_qstrat sum_casc.goldP_qstrat 12 inertia_ratio sum_casc.inertia_ratio 13 trk_z trks[0].pos.z 14 trk_R TMath.Sqrt(TMath.Power(trks[0].pos.x,2)+TMath.Power(trks[0].pos.y,2)) 15 trk_dz TMath.ACos(trks[0].dir.z) 16 tnhits TMath.Log10(trks[0].fitinf[3]) 17 tnpe TMath.Log10(trks[0].fitinf[9]) 18 tlength trks[0].fitinf[10] 19 nhits_aashower_coin_early_20 sum_casc.nhits_aashower_coin_early_20 20 crkvtot_jpptrack crkv_hits[0].sumtot[1] 21 bdt_track bdt_trk[1] """ bdtvars = [ variable(l.split()[1],l.split()[2]) for l in vardesc.splitlines() if l.strip() != '' ] ####################################################################################################### ####################################################################################################### # this functions is use to select the events that will be used to train the bdt and to define the signal ####################################################################################################### ####################################################################################################### def not_nan(var): if math.isinf(var) or math.isnan(var): return False else: return True def preselection(trks,sum_jgandalf,crkv_hits) : # coding: # -1 : we have a shower # 0 : contained shower # 1 : we have a track # 2 : contained track vertex # 3 : shower cherenkov hits w.r.t. aashowerfit track # 4 : cut on #hits aashowerfit # 5 : 2D likelihood ratio cut vs aashowerfit zenith # 6 : jpp length cut precut = -2 if trks[1].rec_type != 0: # we have a shower precut = -1 if trks[1].pos.z < 650: # contained shower precut = 0 if trks[0].fitinf.size() > 10 and trks[0].fitinf[0] > 0 and trks[0].fitinf[10] > 0 and trks[0].fitinf[3] > 0 and trks[0].fitinf[9] > 0 and trks[0].E > 0: # we have a track reco precut = 1 if trks[0].pos.z < 650: # contained track vertex precut = 2 if crkv_hits[1].nhits[2] > 10 : precut = 3 if math.log10(trks[1].fitinf[1]) > 2.6: precut = 4 if (trks[1].dir.z > - (trks[0].lik - trks[1].lik)/1200 + 0.475): precut = 5 # f23 https://git.km3net.de/tvaneeden/cascade_selection/-/blob/master/2021-12-04_optimise_2d_cuts/likelihood_ratio_cut_et_havereco_cont_jpplen_crkvaa_aanhits/ratio_vs_zen/draw_new_cuts.py if trks[0].fitinf[10] < 300: # length jpp cut precut = 6 return precut def signal(trks,nu,sum_mc_trks) : if sum_mc_trks.tmuon.E==0 and nu.pos.z < 650 and (nu.pos.x**2 + nu.pos.y**2 < 250000) and ROOT.angle_between(nu.dir, trks[1].dir)*180/math.pi<10: return 1 else: return 0 ####################################################################################################### ####################################################################################################### # this function creates a summary tree that will be used to train the TMVA ####################################################################################################### ####################################################################################################### def prepare_tree(outfile) : fcos = flux.IsotropicFlux(0.600e-4,2.00,1e50) fconv = flux.AanetFlux("AanetConv") fprmt = flux.AanetFlux("AanetPrompt") f_out = ROOT.TFile(outfile,"RECREATE") flv = array.array('f',[0.]) chl = array.array('f',[0.]) wgt = array.array("f",[0.]) sgl = array.array("f",[0.]) data = ROOT.TTree("data","data") data.Branch("flv",flv,"flv/F") data.Branch("chl",chl,"chl/F") data.Branch("wgt",wgt,"wgt/F") data.Branch("sgl",sgl,"sgl/F") for v in bdtvars : data.Branch(v.name,v.ptr,v.name+"/F") sum_mc_trks = ROOT.MC_trks_summary() sum_jgandalf = ROOT.rec_trks_summary() crkv_hits = ROOT.vector('crkv_hits')() cascade_summary = ROOT.cascade_summary() bdt_trk = array.array("f",[0.,0.]) for ch,filename in allfiles.items() : file = ROOT.EventFile(filename) livetime = ROOT.get_livetime(file.header) if "mu_10T" in ch or "mu_50T" in ch: ngen = 0 else: ngen = file.header.ngen() true_int = 1 if "CC" in filename else 0 print(ch,livetime,ngen) T = file.rootfile().Get('T') T.SetBranchAddress( "sum_mc_trks", sum_mc_trks ) T.SetBranchAddress( "sum_jgandalf", sum_jgandalf ) T.SetBranchAddress( "crkv_hits", crkv_hits ) T.SetBranchAddress( "sum_casc", cascade_summary ) T.SetBranchAddress( "bdt_trk" , bdt_trk ) for e in file : if file.global_index()%100000==0 : print("Event",file.global_index(),"out of",file.total_events) if ch=="mu_10T" and e.mc_trks[0].E>50e3: continue trks = e.trks pre = preselection(trks,sum_jgandalf,crkv_hits) if pre>5 : pdg = e.mc_trks[0].type ene = e.mc_trks[0].E cth = e.mc_trks[0].dir.z flv[0] = pdg chl[0] = true_int wgt[0] = 0. if pdg==0 : wgt[0] = 1./livetime else : wgt[0] = e.w[1]/ngen*( fconv.ComputeFlux(pdg,ene,cth) + fprmt.ComputeFlux(pdg,ene,cth) + fcos.ComputeFlux(pdg,ene,cth) ) sgl[0] = signal(trks,e.mc_trks[0]) for v in bdtvars : v.set(trks,sum_jgandalf,crkv_hits,cascade_summary,bdt_trk) data.Fill() f_out.cd() data.Write("data") f_out.Close() ####################################################################################################### ####################################################################################################### # this function train your tmva. you have to input your summary file ####################################################################################################### ####################################################################################################### def trainTMVA(infile,bdtname) : f_in = ROOT.TFile(infile) f_out = ROOT.TFile("output.root","RECREATE") data = f_in.Get("data") # Create the TMVA factory ROOT.TMVA.Tools.Instance() factory = ROOT.TMVA.Factory("TMVAClassification", f_out,"AnalysisType=Classification") dataloader = ROOT.TMVA.DataLoader("train_tmva") for v in bdtvars : dataloader.AddVariable(v.name,"F") dataloader.AddTree(data,"Signal", 1.,ROOT.TCut("sgl==1")) dataloader.AddTree(data,"Background",1.,ROOT.TCut("sgl==0")) dataloader.SetWeightExpression("wgt") dataloader.PrepareTrainingAndTestTree(ROOT.TCut("1==1"), "NormMode=None:VerboseLevel=Debug" ); # Book the SVM method and train/test method = factory.BookMethod( dataloader, ROOT.TMVA.Types.kBDT, bdtname, "H:VerbosityLevel=Debug:NTrees=500:BoostType=Grad:Shrinkage=0.30:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20:MaxDepth=5" ) factory.TrainAllMethods() factory.TestAllMethods() factory.EvaluateAllMethods() ####################################################################################################### ####################################################################################################### # this function apply the tmva to your DST files ####################################################################################################### ####################################################################################################### def applyTMVA(bdtname) : reader = ROOT.TMVA.Reader("") for v in bdtvars : reader.AddVariable(v.name,v.ptr) reader.BookMVA( bdtname, "train_tmva/weights/TMVAClassification_"+bdtname+".weights.xml" ) for ch,file in allfiles.items() : sin = file.filenames[0].split(".")[:-1] outname = ".".join(sin)+".bdt_casc.root" print(outname) file.set_output( outname ) file.autosave = False file.output_file.mkdir("dst_history") file.output_file.cd("dst_history") for key in file.rootfile().dst_history.GetListOfKeys() : key.Write() file.output_file.cd() sum_mc_trks = ROOT.MC_trks_summary() sum_mc_hits = ROOT.hits_summary() sum_jgandalf = ROOT.rec_trks_summary() sum_hits = ROOT.hits_summary() sum_trig_hits = ROOT.hits_summary() crkv_hits = ROOT.vector('crkv_hits')() cascade_summary = ROOT.cascade_summary() bdt_trk = array.array("f",[0.,0.]) bdt_casc = array.array("f",[0.,0.]) outT = ROOT.TTree("T","summary variables") outT.Branch( "sum_mc_trks" , sum_mc_trks , 32000, 4 ) outT.Branch( "sum_mc_hits" , sum_mc_hits , 32000, 4 ) outT.Branch( "sum_jgandalf" , sum_jgandalf , 32000, 4 ) outT.Branch( "sum_hits" , sum_hits , 32000, 4 ) outT.Branch( "sum_trig_hits" , sum_trig_hits , 32000, 4 ) outT.Branch( "crkv_hits" , crkv_hits , 32000, 4 ) outT.Branch( "sum_casc" , cascade_summary , 32000, 4 ) outT.Branch( "bdt_trk" , bdt_trk , "bdt_trk[2]/F" ) outT.Branch( "bdt_casc" , bdt_casc , "bdt_casc[2]/F" ) file.output_tree.AddFriend(outT) for alias in file.roottree().GetListOfAliases() : file.output_tree.SetAlias(alias.GetName(),alias.GetTitle()) inT = file.rootfile().Get('T') inT.SetBranchAddress( "sum_mc_trks" , sum_mc_trks ) inT.SetBranchAddress( "sum_mc_hits" , sum_mc_hits ) inT.SetBranchAddress( "sum_jgandalf" , sum_jgandalf ) inT.SetBranchAddress( "sum_hits" , sum_hits ) inT.SetBranchAddress( "sum_trig_hits" , sum_trig_hits ) inT.SetBranchAddress( "crkv_hits" , crkv_hits ) inT.SetBranchAddress( "sum_casc" , cascade_summary ) inT.SetBranchAddress( "bdt_trk" , bdt_trk ) for e in file : if file.global_index()%100000==0 : print("Event",file.global_index(),"out of",file.total_events) if ch=="mu_10T" and e.mc_trks[0].E>50e3: continue trk = e.trks[0] bdt_casc[0] = preselection(e.trks,sum_jgandalf,crkv_hits) bdt_casc[1] = -2. if bdt_casc[0]>-1 : for v in bdtvars : v.set(e.trks,sum_jgandalf,crkv_hits,cascade_summary, bdt_trk) bdt_casc[1] = reader.EvaluateMVA(bdtname) outT.Fill() file.write() del file