#ifndef DATASET_INCLUDED #define DATASET_INCLUDED #include "../search/definitions.hh" #include "stringutil.hh" #include #include #include "Head.hh" #include "TObject.h" #include "TTimeStamp.h" using namespace std; inline string db_query_s(string what = "detectors.txt") { string s = "wget -qO- --user=kmcprod --load-cookies=/sps/km3net/users/kmcprod/mycookies.txt --prefer-family=IPv4 --no-check-certificate " "\"https://km3netdbweb.in2p3.fr/streamds/" + what + "\""; return exec(s); } inline vector> db_query(string what = "detectors.txt") { string s = db_query_s(what); vector v = split(s, "\n"); vector> r; for (auto ss : v){ r.push_back(split(ss,"\t")); } return r; } namespace analysis { struct File : public TObject { string filename; string version; string flav; Head header; map history; // jpp 'meta' data int run_id =0; int nevents =0; double daq_livetime =0; double mc_livetime =0; double ngen =0; bool ok = false; int events_in_file = 0; string status; // unprocessed, File(string flav, string filename) : filename(filename), flav(flav) { ok = init(); } File(string flav, string version, string filename ): filename(filename), version(version), flav(flav) { ok = init(); } File() {} bool init() { TFile f( filename.c_str() ); auto* h = ((Head*) f.Get("Head")); if (!h) return false; header = *h; TTree* E = (TTree*) f.Get("E"); nevents = E->GetEntriesFast(); E->Draw("run_id","","goff",1); if (!nevents) { print ("files has no entries",filename); return false; } run_id = int( E->GetV1()[0]+0.001 ); daq_livetime = header.daq_livetime(); try { mc_livetime = header.mc_livetime(); } catch (const std::out_of_range& ) {} try { ngen = header.ngen(); } catch ( const std::out_of_range& ) {} if ( TDirectory* dir = f.GetDirectory("META") ) { dir->ReadAll(); for (auto p : * ( dir->GetListOfKeys() ) ) { TKey* key = (TKey*) p; if (!key) break; TObject *obj = key->ReadObj(); history[ obj->GetName() ] = obj->GetTitle(); } } return true; } ClassDef(File, 1); }; struct FileSet : public TObject { string flav; string version; vector files; Head header; // combined header string name; int firstrun = 0; int lastrun = 0; FileSet() {} void add_file( File& f ) { bool ok = true; if (header.size() == 0) header = f.header; else { ok = add(header, f.header, /*rbrmode="*/ true); } if (!ok) fatal("incompatible headers in fileset."); if (firstrun == 0) firstrun = f.run_id; if (lastrun == 0) lastrun = f.run_id; firstrun = std::min(firstrun, f.run_id); lastrun = std::max(lastrun, f.run_id); files.push_back(f); } vector names() { vector r; for (auto &f : files) r.push_back(f.filename); return r; } ClassDef(FileSet, 1); }; struct Run : public TObject { int run_id = 0 ; string detector = "none"; string period = "none"; int batch = 0; map filesets; // files by string map attributes; Run() {} Run(vector& keys, vector& values) { for (unsigned i = 0; i < min(keys.size(), values.size() ); i++) { attributes[keys[i]] = values[i]; if ( startswith( keys[i], "UNIX") ) // make human-readible version { long t = to(values[i]); TTimeStamp p( t /1000 ,0 ); attributes[ replace_all( keys[i],"UNIX","")]= p.AsString("s"); } } run_id = to(attributes["RUN"]); } map infomap() { map r = { {"run_id", str(run_id) }, {"detector", detector } }; foreach_map( k, v, filesets ) { r[ "fileset:"+k ] = str( v.files.size() ); } foreach_map( k,v, attributes ) { r[k] = v; } return r; } bool have_fileset( string flav ) { if (filesets.find(flav) == filesets.end() ) return false; return true; } int nfiles( string flav ) { if ( !have_fileset(flav)) return 0; return filesets[flav].files.size(); } long total_nevents( string flav ) { long r = 0; if ( !have_fileset(flav)) return 0; for( auto& f : filesets[flav].files ) r+= f.nevents; return r; } double daq_livetime() { if (!have_fileset("data")) return 0; try { return filesets["data"].header.daq_livetime(); } catch ( const std::out_of_range& ) // no daq in header { return 0; } } double mc_livetime() { if (!have_fileset("muon")) return 0; return filesets["muon"].header.mc_livetime(); } vector all_flavors() { return keys( filesets ); } string __str__() { return str( Table( infomap() ) ); } Table files_table () { Table T("fileset","flavor","version", "filename", "nevents", "datalive" ,"mclive","ngen"); T.title = "files for run" +str(run_id); foreach_map( flavor, fs , filesets ) { for( auto& f : fs.files ) { T << fs.name << f.flav << f.version << f.filename << f.nevents << f.daq_livetime << f.mc_livetime << f.ngen ; } } return T; } ClassDef(Run, 1); }; struct RunSet : public TObject { string name = "D0ARCA006"; string detector = "D0ARCA006"; string description = ""; map runs; vector run_ids() { return keys(runs); }; TTimeStamp start_time; TTimeStamp end_time; RunSet(string detector = "D0ARCA006") : detector(detector) { } /*! keep only the runs for which predicate is true */ int filter( std::function< bool(Run&)> predicate ) { for( auto it = runs.begin(); it!=runs.end() ;) { bool keep = predicate( it->second ); if (!keep) runs.erase( it++ ); else it++; } return runs.size(); } bool have_run( int runid ) { return contains( keys(runs), runid ); } vector runids() { return keys( runs ); } int load_runs() { int n = 0; vector > result = db_query("runs.txt?detid=" + detector); for (unsigned line = 1; line < result.size(); line++) { Run r(result[0], result[line]); r.detector = detector; runs[r.run_id] = r; n++; } print("loaded",n,"runs from DB."); return n; } double version_helper( string v ) { auto vv = split( split(v, "-")[0], "." ); if (vv.size() !=3 ) throw std::range_error("dus"); return to(vv[0]) * 1000000. + to(vv[1]) * 1000 + to(vv[2]); } bool higher_version( string v1, string v2 ) { if (v1 == "" ) return false; if (v2 == "" ) return true; return version_helper(v1) > version_helper( v2); } void load_run_summary_numbers() { string minrun = str( min( run_ids() ) ); string maxrun = str( max( run_ids() ) ); string q = "runsummarynumbers.txt?detid="+detector+"&minrun="+minrun+"&maxrun="+maxrun; vector > result = db_query( q ); map > > M; // [run_id][parname] -> (version,value) for (unsigned line = 1; line < result.size(); line++) { int runnumber = to(result[line][1]); string parameter_name = result[line][2]; string jpp_version = result[line][3]; string parameter_value = result[line][4]; if (!have_run( runnumber )) { print ("found runsummarynubmers infor for unknown run", runnumber ); } auto& p = M[runnumber][parameter_name]; try { if ( higher_version( jpp_version, p.first) ) p = make_pair( jpp_version, parameter_value ); } catch ( std::range_error& ) { print ("weird db line", result[line]); // the ones I saw seem safe to skip } } foreach_map_map( runid, parameter, pair_ , M ) { runs[runid].attributes[parameter] = pair_.second; } } Table run_table() { auto cols = split("run_id det period batch nfiles_data daq_live nfiles_mu mc_live STARTTIME RUNSETUPNAME POS_CALIBSETID ROT_CALIBSETID T0_CALIBSETID"); Table T(cols); foreach_map( k,v, runs ) { T << v.run_id << v.detector << v.period << v.batch << v.nfiles("data") << v.daq_livetime() << v.nfiles("muon") << v.mc_livetime(); for (unsigned i = 8; i < cols.size() ; i++ ) T << v.attributes[cols[i]]; } return T; } /* flag all runs in the given run-range as belonging to period */ int flag_period(string period_name, string description, int first_run, int last_run) { (void)description; // todo: remember it somewhere int n = 0; foreach_value(r, runs) { print(r.run_id); if (r.run_id >= first_run && r.run_id <= last_run) { n++; if (r.period != "") print("warning: reassigning period of run", r.run_id, "from", r.period, "to", period_name); r.period = period_name; } } return n; } /* all defined periods */ vector all_periods() { set r; foreach_value(run, runs) r.insert(run.period); return vector(r.begin(), r.end()); } /* all defined periods */ vector all_batches() { set r; foreach_value(run, runs) r.insert(run.batch); return vector(r.begin(), r.end()); } /* all defined flavors */ vector all_flavors() { set r; foreach_value(run, runs) for( auto s : run.all_flavors() ) r.insert( s ); return vector(r.begin(), r.end()); } string get_period_of_batch (int batch ) { foreach_value(run, runs) if (run.batch == batch) return run.period; print ("no run found for batch",batch); return "none"; } long total_nevents(string period, string flavor) { long r=0; foreach_map( k,run, runs) { (void) k; if ( run.period == period ) r += run.total_nevents(flavor); } return r; } long total_nevents_of_batch(int batch, string flavor) { long r=0; foreach_map( k,run, runs) { (void) k; if ( run.batch == batch ) r += run.total_nevents(flavor); } return r; } bool add_files( string flav, string version, vector filenames ) { print( filenames , filenames.size() ); for( auto fn: filenames ) { print (fn); File f( flav, version, fn ); if ( !f.ok ) { print ("file is not okay -> not adding "); continue; } print(f.run_id); if (!have_run(f.run_id) ) { print("skipping file for unknown run", f.run_id ); } //print("add", f.run_id, flav, f.filename ); runs[ f.run_id ].filesets[ flav ].add_file( f ); //print ("ok"); } return true; } /*! Assign the run.batch number so that each batch contains less than events_per_batch. Batch numbers shall not cross periods. */ void set_batches(string flavor, int events_per_batch = 10000000 ) { // for this, each run should really belong to only one period int batch_counter = 1; long n = 0; for (auto period : all_periods() ) { foreach_map( run_id, run, runs ) { if ( run.period != period ) continue; long nn = run.total_nevents( flavor ); if (n > 0 && n + nn > events_per_batch ) { batch_counter++; n=0; } run.batch = batch_counter; n += nn; } } } ClassDef(RunSet, 1); }; } #endif