#!/usr/bin/env python """ Usage sparse_data_speed.py [-f input_file] Sometimes, you want to plot only a few variables from a large set of events. In this case, loading all events in full can be a wast of time. This example draws a 2-d histogram of the x and y position of the first reconstructed track in each event. It shows options to hand-pick which data are read from disk. The difference between python/pyroot and c++ is also illustrated. typical output: -------------------------------------------------------------- method time io-time histogram-entries -------------------------------------------------------------- fillhist_pyroot 16.3523 14.031 89409 fillhist_pyroot_pickbranch 3.46909 1.8777 89409 fillhist_ttree_draw 1.11243 - 89409 fillhist_cpp 13.8098 13.2574 89409 fillhist_cpp_pickbranch 2.37706 1.94749 89409 fillhist_uproot 1.88783 - 0 -------------------------------------------------------------- options: -f : input file default=/sps/km3net/users/jhofest/data_processing/prod_master/data/KM3NeT_00000049/v5.40/reco/datav5.40.jorcarec.aanet.00007274.root -h : this help message and exit default=False -i : Python interative mode (prompt when done) default=True """ import aa, time, sys from ROOT import TH2D, EventFile, Timer, cxx, Table options = aa.Options( __doc__, sys.argv[1:] ) fn = options.f f = EventFile(fn) def pickbranch ( evfile ) : ' select only the branches needed for this analysis ' evfile.roottree().SetBranchStatus("*",False) evfile.roottree().SetBranchStatus("trks.pos.y",True) evfile.roottree().SetBranchStatus("trks.pos.x",True) def allbranch( evfile ) : ' turn on all branches so that full Evt gets loaded' evfile.roottree().SetBranchStatus("*",True) def fillhist_pyroot ( histogram, evfile ) : 'Fill histogram using a loop over Evts in PyROOT' for evt in evfile : try : p = evt.trks[0].pos histogram.Fill( p.x , p.y ) except IndexError : pass def fillhist_pyroot_pickbranch( histogram, evfile ): 'Fill histogram using a loop over Evts in PyROOT, but only loading needed branches' pickbranch( evfile ) fillhist_pyroot( histogram, evfile ) allbranch( evfile ) def fillhist_ttree_draw (histgram, evfile ): 'Fill histogram using ROOTs TTree::Draw' E = evfile.roottree() E.Draw("trks[0].pos.x:trks[0].pos.y>>hxy","","goff") # as an asside, you can get the results of Draw now also as a numpy array, if False : n = E.GetPlayer().GetDimension() xarr = np.ndarray( (n,) ,dtype= np.float64, buffer= E.GetVal(0) ) yarr = np.ndarray( (n,) ,dtype= np.float64, buffer= E.GetVal(1) ) H = np.histogram2d(xarr, yarr, bins=bins)[0] print (xarr, H ) cxx(''' // Fill hisogram using Evt loop in C++ void fillhist_cpp( TH2D& histogram, EventFile& f ) { for (auto& evt : f ) { if ( evt.trks.size() == 0 ) continue; auto& p = evt.trks[0].pos; histogram.Fill( p.x, p.y ); } } ''') from ROOT import fillhist_cpp def fillhist_cpp_pickbranch( histogram, evfile ): 'Fill hisogram using Evt loop in C++, loading only needed brances' pickbranch( evfile ) fillhist_cpp( histogram, evfile ) allbranch( evfile ) import numpy as np import km3io bins = range(360, 660) H, xedges, yedges = np.histogram2d([np.nan], [np.nan], bins=bins); def fillhist_uproot( histogram, evfile ) : 'fill numpy histogram using uproot (loads only required branches)' global H fn = evfile.rootfile().GetName() try: tracks = km3io.OfflineReader(fn).tracks except KeyError: pass t0 = time.time() mask = tracks.len.counts > 0 pos_x = tracks.pos_x[mask, 0] pos_y = tracks.pos_y[mask, 0] H += np.histogram2d(pos_x, pos_y, bins=bins)[0] print (time.time() - t0 , sum(sum(H))) #------------------------------------------------------------------------------------ # run and time the above functions. #------------------------------------------------------------------------------------ T = Table("method","time","io-time","histogram-entries") def run ( fillfunc ) : f = EventFile( fn ) hxy = TH2D("hxy","hxy",100,360,560,100,480,660) t0 = time.time() fillfunc( hxy, f ) dt = time.time() - t0 io = f.iowatch.RealTime() if not 'draw' in fillfunc.__name__ and not \ 'uproot' in fillfunc.__name__ else "-" T.add( fillfunc.__name__ ).add( dt) T.add(io).add( hxy.GetEntries() ) del f funcs = [ v for k,v in globals().items() if k.startswith( 'fillhist') ] for fu in funcs : run(fu) print(T)