#ifndef HISTOGRAMMER_INCLUCED #define HISTOGRAMMER_INCLUCED #include "Flux.hh" #include "Astro.hh" #include "Evt.hh" #include "Det.hh" #include "io_util.hh" #include #include #include #include using namespace std; #include "TCanvas.h" #include "Computable.hh" #include "Histogram.hh" #include "foreach.hh" #include "stringutil.hh" #include "EventFile.hh" #include "RunSet.hh" using namespace analysis; class Analysis; /*! An Histogrammer is a set of selections, weights, histograms -- for one flavor (data/numucc/mupage/...) */ struct HistSet : public TObject { string name =""; ///> the name of Analyis string period; ///> period of data-taking string flavor; ///> type of files we will be histogramming int batch; ///> if histogramming happens in batches, identical histograms can be added long total_n_done=0; ///> number of processed events Analysis *analysis =0; //Det *det =0; TFile *outputfile = 0; map config_map; bool config_done = false; vector code_store; ///> all pieces of code compiled int dump_every = 100000 ; int progress_every = 10000 ; TH1D hstyle; // histogram that defines the style of all histograms vector variants; vector selections; vector weights; vector observables; vector histograms; template< typename T > vector names_of( vector& vec ) { vector r; for(auto& e : vec) r.push_back( e.name ); return r; } vector names_of ( string what = "observables" ) { if (what == "obervables") return names_of( observables ); if (what == "selections") return names_of( selections ); if (what == "weights") return names_of( weights ); if (what == "variants") return names_of( variants ); if (what == "histograms") return names_of( histograms ); return {}; } map>>> _histmap; //! virtual ~HistSet() {} string key() { return name + "_" + flavor + "_" + str(batch); } void inject() { define("string flavor=\""+flavor+"\";"); define("Analysis* ana = (Analysis*) " + str((void *)analysis)+";"); //define("Det* det = (Det*) " + str((void *)det )+";"); define("FileSet* fileset;"); define("Head header;"); } void print_code_store() { for (auto &s : code_store) { print("-----------------------------------------------------------------"); print(s); } } void set_output_file( TFile* outfile ) { outputfile = outfile; for(auto& h : histograms ) h.H->SetDirectory(outputfile); } void init(string s) { // s can be file or the full config string if (file_exists(s)) { read_ana_file(s); } else { read_config(s); } dumpana(); } HistSet() : batch(0) {} HistSet(string s) { init(s); } void add(const HistSet &other) { for (auto &h : other.histograms) { Histogram *mine = get_by_name(h.name, histograms, true); if (!mine) { histograms.push_back(h); } else { print("adding histogram to existing ", mine->name); mine->H->Add(h.H); } } for (auto &o : other.observables) { Observable *mine = get_by_name(o.name, observables, true); if (mine) // check it is the same observable { if (*mine == o) { print("ignoring duplicate observable", o.name); } else { fatal("observable with same name, but different binning/something else"); } } else // add it { observables.push_back(o); } } } /*! return a flat list of all selections, weights and obserables */ vector computables() { vector r; for (auto &x : selections) r.push_back(&x); for (auto &x : weights) r.push_back(&x); for (auto &x : observables) r.push_back(&x); return r; } /*! call a function by its name -- your responsiblity to ensure the arguments are good */ template bool call_func_maybe(string function_name, Args... args) { auto gf = gROOT->GetGlobalFunction(function_name.c_str()); if (!gf) return false; auto f = gInterpreter->FindSym(gf->GetMangledName()); // try to call it -- fasten your seatbelts. typedef double (*fp)(Args...); auto func = (fp)f; double r = func(args...); (void)r; return true; } /*! Loop over all input files, fill the histograms */ bool run(RunSet &runset, int maxentries = -1) { long total_n_events = runset.total_nevents_of_batch( batch, flavor ); if (!config_done) init_config(); print ("About to process", total_n_events,"events from", runset.runs.size(),"runs for", period, flavor ); // check the version is the same int nrun =0; foreach_map(run_id, run, runset.runs) { nrun++; if (!run.have_fileset(flavor)) { print ("no files for ", run_id , flavor ); continue; } else { print ("----------------doing run ", run_id ); } // for now, each run/flavor must only have one fileset // (could change in future, when e.g. comparing MCs) FileSet& fs = run.filesets[flavor]; // todo: set weights corresponding to full fileset. define("fileset = (FileSet*)"+str(&fs)+";", false ); define("header = fileset->header", false ); call_func_maybe("on_new_run", run); for (File &ff : fs.files) { print ("opening", ff.filename ) ; EventFile f( ff.filename ); /// f = EventFile object // // load the detector (god knows which one... for now hardode) // string detfile = "/sps/km3net/repo/data_processing/tag/v6_ARCA6_run_by_run/prod/data/KM3NeT_00000075/v6.2/detectors/KM3NeT_00000075_20210715.detx" // det->doms.clear(); // // det->read( detfile ); call_func_maybe("on_new_eventfile", f); for (Evt &evt : f) { if (maxentries > 0 && f.global_index() >= maxentries) { print("requested number of events reached."); return true; } process(evt); total_n_done++; if (dump_every > 0 && total_n_done % dump_every == 0) { dump(evt); dump_histograms(); } if ( total_n_done % progress_every == 0) { print ( total_n_done ,"of", total_n_events, "done (", 100*total_n_done/double(total_n_events),"%) current run :", run_id, ",", nrun ,"of", runset.runs.size() ); } } } } finalize(); return true; } void finalize() { print("-- finalize --"); for (auto &h : histograms) { if (contains(h.options, string("binwidth"))) { // only support fixed bin histograms print("scaling", h.key(), " by 1/binwidth."); h.H->Scale(1.0 / h.H->GetBinWidth(1)); } } } void process(Evt &evt) { // print ("Process"); static int cache_id = 0; for (auto &variant : variants) { bool variant_exists = variant.eval(evt); if (!variant_exists) continue; // NB: a new variant should cause all the cached observables to be // invalidated. -> unique number cache_id cache_id += 1; for (auto &selection : selections) { bool pass = selection.eval(evt, cache_id); if (!pass) continue; for (auto &weight : weights) { double w = weight.eval(evt, cache_id); for (auto &h : get_histograms(variant.name, selection.name, weight.name)) { //print( h->key() ); h->slurp(evt, w, cache_id); } } } } } void _init_histmap() // only for caching. { // from now on, don't change histograms vector, otherwise _histmap will be invalid. for (auto &h : histograms) { _histmap[h.variant->name][h.selection->name][h.weight->name].push_back(&h); } } /*! return all the histograms pertaining to variant, selection and weight */ vector get_histograms(string variant, string selection, string weight) { if (_histmap.size() == 0) _init_histmap(); return _histmap[variant][selection][weight]; } /*! compile some c++ code */ bool define(string code, bool declare = true ) { bool okay = false; if (declare) okay = gInterpreter->Declare(code.c_str()); else okay = gInterpreter->ProcessLine( code.c_str() ); if (!okay) { cout << " ------- FAILED TO COMPILE THIS ------------------" << endl; cout << code << endl << endl; exit(1); } code_store.push_back(code); return true; } template bool _def(string S, vector &M) { for (auto line : split(S, "\n")) { T o; string remain = o.define(line); if (remain != "") { fatal("remaining string", remain) } // check if it already exists auto v = names_of( M ); if ( contains( v, o.name )) { print ("WARNING: replacing existing quantity", o.name ); int i = index_of( v, o.name ); M[i] = o; } else { M.push_back(o); } } return true; } bool define_selections(string S) { return _def(S, selections); } bool define_weights(string S) { return _def(S, weights); } bool define_observables(string S) { return _def(S, observables); } bool define_variants(string S) { return _def(S, variants); } bool add_histogram( Histogram& h ) { if ( contains( names_of( "histograms") , h.name ) ) { print ("warning: duplicate hsitogram requested",h.name ); print ("not adding second histogram."); return false; } else histograms.push_back(h); return true; } /*! Define histograms via a string. Each line defines a histogram. line must be like parameter(s) | selection(s) | weight(s) in case of more thatn 1 parameter, they must be space-separated */ bool define_histograms(string S) { cout << " define_histograms " << S << endl; if (trim(S) == "") return true; for (auto line_ : split(S, "\n")) { auto line = trim(line_); if (line == "") continue; auto v = split(line, "|"); //line must be like parameter(s) | selection(s) | weight(s) if (v.size() < 4) { cout << "malformed line in define_histograms" << endl; cout << S << endl; return false; } vector pars = split(trim(v[0])); vector wgts = split(trim(v[1])); vector sels = split(trim(v[2])); vector varts = split(trim(v[3])); vector opts; print("sels:", sels, selections, v); if (v.size() == 5) { opts = split(trim(v[4])); print("histgram opts", opts); } if (pars.size() < 1) { cout << " error in pars " << endl; return false; } if (pars.size() > 3) { cout << " too many observables" << endl; return false; } if (sels.size() < 1) { cout << " error in selections " << endl; return false; } if (wgts.size() < 1) { cout << " error in weights " << endl; return false; } if (varts.size() < 1) { cout << " error in variants " << endl; return false; } if (sels[0] == "*") { sels.clear(); for (auto s : selections) sels.push_back(s.name); } if (wgts[0] == "*") { wgts.clear(); for (auto s : weights) wgts.push_back(s.name); } if (varts[0] == "*") { varts.clear(); for (auto s : variants) varts.push_back(s.name); } auto sels_ = get_by_names(sels, selections); auto wgts_ = get_by_names(wgts, weights); auto varts_ = get_by_names(varts, variants); if (pars[0] != "*") { auto pars_ = get_by_names(pars, observables); for (auto &variant : varts_) { for (auto &selection : sels_) { for (auto &weight : wgts_) { Histogram h(name, flavor, pars_, *variant, *selection, *weight); h.H->SetDirectory(outputfile); h.set_style(hstyle); h.options = opts; print ("push custom"); add_histogram( h ); // push back with name check } } } } else //wildcard for observable name : construct all 1-d histograms { print ("wildcard histograms"); dumpana(); for (auto &variant : varts_) { for (auto &selection : sels_) { for (auto &weight : wgts_) { for (auto &obs : observables) { vector v = {&obs}; Histogram h(name, flavor, v, *variant, *selection, *weight); h.H->SetDirectory(outputfile); h.set_style(hstyle); h.options = opts; add_histogram( h ); // push back with name check } } } } } } return true; } /*! add all members of class object as observables */ bool add_observables(TClass *c, void *ptr_to_object, string objname) { auto dmems = get_datamembers(c, ptr_to_object); string classname = c->GetName(); for (auto d : dmems) { string typ = d[0]; string name = d[1]; string desc = d[3]; string expr = "((" + classname + "*) " + str(ptr_to_object) + " )->" + name; print(" adding observable expression ", expr); string s = objname + name + " | " + desc + " | " + expr + " | auto "; Observable o; o.define(s); observables.push_back(o); } return true; } /*! split the ana files in sections delimited by keywords */ map sections(std::istream &f) { map M; vector keywords = {"include:", "code:", "variants:", "selections:", "weights:", "observables:", "histograms:"}; string s_, current_keyword = "none"; while (getline(f, s_)) { string s = trim(s_); if (current_keyword != "code:") { s = strip_comment_line(s); } if (contains(keywords, s)) { current_keyword = s; continue; } M[current_keyword] += s + '\n'; } return M; } void read_ana_file(string filename, bool incld = false) { ifstream f(filename); read_config(f, incld); } void read_config(string cfg, bool incld = false) { istringstream f(cfg); read_config(f, incld); } void read_config(std::istream &f, bool incld = false) { auto M = sections(f); print(M); // include should weave the included file into the current one, // by including each section of the included file in front of the current one. string s = trim(M["include:"]); if (s != "") { std::ifstream ff(s); auto MM = sections(ff); // the following should also work when M does not have key yet foreach_map(key, val, MM) M[key] = val + "\n" + M[key]; print("after include:", M); } config_map = M; } void init_config() { inject(); define(config_map["code:"]); define_variants(config_map["variants:"]); define_selections(config_map["selections:"]); define_weights(config_map["weights:"]); define_observables(config_map["observables:"]); define_histograms(config_map["histograms:"]); config_done = true; } bool check_for_duplicates( bool is_fatal = false ) { map M; for (auto &h : histograms) M[h.name]++; foreach_map( name, n , M ) { if ( n>1 ) { print ("histogram",name,"occurs",n,"times for", period, flavor , "batch=", batch); print( histogram_table() ); if (is_fatal) fatal("aborting"); return false; } } return true; } /*! merge function intended for combining histograms from different batch jobs */ void merge( HistSet& other ) { if ( other.name != name ) fatal("cant merge Histsets with different name"); if ( other.flavor != flavor) fatal("cant merge Histsets with different flavor"); if ( other.period != period) fatal("cant merge Histsets with different period"); for (auto &h : other.histograms) { Histogram *mine = get_by_name(h.name, histograms, true); if (!mine) { fatal ("missing histogram while merging histogram sets:", h.name ); } else { print("adding histogram to existing ", mine->name); mine->H->Add(h.H); // maybe some histograms should not be added but averaged (?) } } } Table dumpana() { Table T("type", "name", "description"); //for( auto& v : input_files ) T << "input_files"<< v << " "; for (auto &v : variants) T << "variants" << v.name << v.desc; for (auto &s : selections) T << "selection" << s.name << s.desc; for (auto &w : weights) T << "weight" << w.name << w.desc; for (auto &o : observables) T << "observable" << o.name << o.desc; for (auto &h : histograms) T << "histograms" << h.vars << h.title(); T.title = "Summary of Histogrammer " + name; print(T); return T; } void dump(Evt &evt) { Table T("type", "name", "value"); for (auto &o : observables) { T << "observable" << o.name << o.eval(evt); } for (auto &o : weights) { T << "weight" << o.name << o.eval(evt); } for (auto &o : selections) { T << "selection" << o.name << o.eval(evt); } print(T); } Table histogram_table() { return tabulate(histograms); } void dump_histograms() { print(histogram_table()); } void set_style() { foreach (h, histograms) h.set_style(hstyle); } // void write() // { // // add info to listoffunctions, so that it is stored. // // for ( auto& h : histograms ) // // { // // string x = "INFO\n" + h.info(); // // //h.H->GetListOfFunctions()->Add( new TObjString( x.c_str() ) );// todo: this leaks // // } // print("key==", key()); // // the following failes, but will be possible if we rootcint this // outputfile->WriteObject(this, key().c_str()); // outputfile->Write(); // outputfile->Close(); // } ClassDef(HistSet, 1); }; #endif