/********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : OptimizeConfigParameters * * Web : http://tmva.sourceforge.net * * * * Description: The OptimizeConfigParameters takes care of "scanning/fitting" * * different tuning parameters in order to find the best set of * * tuning paraemters which will be used in the end * * * * Authors (alphabetical): * * Helge Voss - MPI-K Heidelberg, Germany * * * * Copyright (c) 2005: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://ttmva.sourceforge.net/LICENSE) * **********************************************************************************/ #include "TMVA/OptimizeConfigParameters.h" #include #include #include "TMath.h" #include "TGraph.h" #include "TH1.h" #include "TH2.h" #include "TDirectory.h" #include "TMVA/IMethod.h" #include "TMVA/MethodBase.h" #include "TMVA/GeneticFitter.h" #include "TMVA/MinuitFitter.h" #include "TMVA/Interval.h" #include "TMVA/PDF.h" #include "TMVA/MsgLogger.h" #include "TMVA/Tools.h" ClassImp(TMVA::OptimizeConfigParameters) //_______________________________________________________________________ TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase * const method, std::map tuneParameters, TString fomType, TString optimizationFitType) : fMethod(method), fTuneParameters(tuneParameters), fFOMType(fomType), fOptimizationFitType(optimizationFitType), fMvaSig(NULL), fMvaBkg(NULL), fMvaSigFineBin(NULL), fMvaBkgFineBin(NULL) { // Constructor which sets either "Classification or Regression" std::string name = "OptimizeConfigParameters_"; name += std::string(GetMethod()->GetName()); fLogger = new MsgLogger(name); if (fMethod->DoRegression()){ Log() << kFATAL << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimization" << " --> exit" << Endl; } Log() << kINFO << "Automatic optimisation of tuning parameters in " << GetMethod()->GetName() << " uses:" << Endl; std::map::iterator it; for (it=fTuneParameters.begin(); it!=fTuneParameters.end();it++) { Log() << kINFO << it->first << " in range from: " << it->second.GetMin() << " to: " << it->second.GetMax() << " in : " << it->second.GetNbins() << " steps" << Endl; } Log() << kINFO << " using the options: " << fFOMType << " and " << fOptimizationFitType << Endl; } //_______________________________________________________________________ TMVA::OptimizeConfigParameters::~OptimizeConfigParameters() { // the destructor (delete the OptimizeConfigParameters, store the graph and .. delete it) GetMethod()->BaseDir()->cd(); Int_t n=Int_t(fFOMvsIter.size()); Float_t *x = new Float_t[n]; Float_t *y = new Float_t[n]; Float_t ymin=+999999999; Float_t ymax=-999999999; for (Int_t i=0;iy[i]) ymin=y[i]; if (ymaxGetName())+"_FOMvsIterFrame","",2,0,n,2,ymin*0.95,ymax*1.05); h->SetXTitle("#iteration "+fOptimizationFitType); h->SetYTitle(fFOMType); TGraph *gFOMvsIter = new TGraph(n,x,y); gFOMvsIter->SetName((TString(GetMethod()->GetName())+"_FOMvsIter").Data()); gFOMvsIter->Write(); h->Write(); // delete fFOMvsIter; } //_______________________________________________________________________ std::map TMVA::OptimizeConfigParameters::optimize() { if (fOptimizationFitType == "Scan" ) this->optimizeScan(); else if (fOptimizationFitType == "FitGA" || fOptimizationFitType == "Minuit" ) this->optimizeFit(); else { Log() << kFATAL << "You have chosen as optimization type " << fOptimizationFitType << " that is not (yet) coded --> exit()" << Endl; } Log() << kINFO << "For " << GetMethod()->GetName() << " the optimized Parameters are: " << Endl; std::map::iterator it; for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); it++){ Log() << kINFO << it->first << " = " << it->second << Endl; } return fTunedParameters; } //_______________________________________________________________________ std::vector< int > TMVA::OptimizeConfigParameters::GetScanIndices( int val, std::vector base){ // helper function to scan through the all the combinations in the // parameter space std::vector < int > indices; for (UInt_t i=0; i< base.size(); i++){ indices.push_back(val % base[i] ); val = int( floor( float(val) / float(base[i]) ) ); } return indices; } //_______________________________________________________________________ void TMVA::OptimizeConfigParameters::optimizeScan() { // do the actual optimization using a simple scan method, // i.e. calcualte the FOM for // different tuning paraemters and remember which one is // gave the best FOM Double_t bestFOM=-1000000, currentFOM; std::map currentParameters; std::map::iterator it; // for the scan, start at the lower end of the interval and then "move upwards" // initialize all parameters in currentParameter currentParameters.clear(); fTunedParameters.clear(); for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){ currentParameters.insert(std::pair(it->first,it->second.GetMin())); fTunedParameters.insert(std::pair(it->first,it->second.GetMin())); } // now loop over all the parameters and get for each combination the figure of merit // in order to loop over all the parameters, I first create an "array" (tune parameters) // of arrays (the different values of the tune parameter) std::vector< std::vector > v; for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){ std::vector< Double_t > tmp; for (Int_t k=0; ksecond.GetNbins(); k++){ tmp.push_back(it->second.GetElement(k)); } v.push_back(tmp); } Int_t Ntot = 1; std::vector< int > Nindividual; for (UInt_t i=0; i indices = GetScanIndices(i, Nindividual ); for (it=fTuneParameters.begin(), index=0; index< indices.size(); index++, it++){ currentParameters[it->first] = v[index][indices[index]]; } Log() << kINFO << "--------------------------" << Endl; Log() << kINFO <<"Settings being evaluated:" << Endl; for (std::map::iterator it_print=currentParameters.begin(); it_print!=currentParameters.end(); it_print++){ Log() << kINFO << " " << it_print->first << " = " << it_print->second << Endl; } GetMethod()->Reset(); GetMethod()->SetTuneParameters(currentParameters); // now do the training for the current parameters: GetMethod()->BaseDir()->cd(); GetMethod()->GetTransformationHandler().CalcTransformations( GetMethod()->Data()->GetEventCollection()); GetMethod()->Train(); currentFOM = GetFOM(); Log() << kINFO << "FOM was found : " << currentFOM << "; current best is " << bestFOM << Endl; if (currentFOM > bestFOM) { bestFOM = currentFOM; for (std::map::iterator iter=currentParameters.begin(); iter != currentParameters.end(); iter++){ fTunedParameters[iter->first]=iter->second; } } } GetMethod()->Reset(); GetMethod()->SetTuneParameters(fTunedParameters); } void TMVA::OptimizeConfigParameters::optimizeFit() { // ranges (intervals) in which the fit varies the parameters std::vector ranges; // intervals of the fit ranges std::map::iterator it; std::vector pars; // current (starting) fit parameters for (it=fTuneParameters.begin(); it != fTuneParameters.end(); it++){ ranges.push_back(new TMVA::Interval(it->second)); pars.push_back( (it->second).GetMean() ); // like this the order is "right". Always keep the // order in the vector "pars" the same as the iterator // iterates through the tuneParameters !!!! } // create the fitter FitterBase* fitter = NULL; if ( fOptimizationFitType == "Minuit" ) { TString opt=""; fitter = new MinuitFitter( *this, "FitterMinuit_BDTOptimize", ranges, opt ); }else if ( fOptimizationFitType == "FitGA" ) { TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5"; fitter = new GeneticFitter( *this, "FitterGA_BDTOptimize", ranges, opt ); } else { Log() << kWARNING << " you did not specify a valid OptimizationFitType " << " will use the default (FitGA) " << Endl; TString opt="PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5"; fitter = new GeneticFitter( *this, "FitterGA_BDTOptimize", ranges, opt ); } fitter->CheckForUnusedOptions(); // perform the fit fitter->Run(pars); // clean up for (UInt_t ipar=0; iparReset(); fTunedParameters.clear(); Int_t jcount=0; for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){ fTunedParameters.insert(std::pair(it->first,pars[jcount++])); } GetMethod()->SetTuneParameters(fTunedParameters); } //_______________________________________________________________________ Double_t TMVA::OptimizeConfigParameters::EstimatorFunction( std::vector & pars) { // return the estimator (from current FOM) for the fitting interface std::map< std::vector , Double_t>::const_iterator iter; iter = fAlreadyTrainedParCombination.find(pars); if (iter != fAlreadyTrainedParCombination.end()) { // std::cout << "I had trained Depth=" < FOM="<< iter->second <second; }else{ std::map currentParameters; Int_t icount =0; // map "pars" to the map of Tuneparameter, make sure // you never screw up this order!! std::map::iterator it; for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); it++){ currentParameters[it->first] = pars[icount++]; } GetMethod()->Reset(); GetMethod()->SetTuneParameters(currentParameters); GetMethod()->BaseDir()->cd(); GetMethod()->GetTransformationHandler().CalcTransformations( GetMethod()->Data()->GetEventCollection()); GetMethod()->Train(); Double_t currentFOM = GetFOM(); fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM)); return -currentFOM; } } //_______________________________________________________________________ Double_t TMVA::OptimizeConfigParameters::GetFOM() { // Return the Figure of Merit (FOM) used in the parameter // optimization process Double_t fom=0; if (fMethod->DoRegression()){ std::cout << " ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation" << " --> exit" << std::endl; std::exit(1); }else{ if (fFOMType == "Separation") fom = GetSeparation(); else if (fFOMType == "ROCIntegral") fom = GetROCIntegral(); else if (fFOMType == "SigEffAtBkgEff01") fom = GetSigEffAtBkgEff(0.1); else if (fFOMType == "SigEffAtBkgEff001") fom = GetSigEffAtBkgEff(0.01); else if (fFOMType == "BkgRejAtSigEff05") fom = GetBkgRejAtSigEff(0.5); else if (fFOMType == "BkgEffAtSigEff05") fom = GetBkgEffAtSigEff(0.5); else { Log()< exit " << Endl; } } fFOMvsIter.push_back(fom); return fom; } //_______________________________________________________________________ void TMVA::OptimizeConfigParameters::GetMVADists() { // fill the private histograms with the mva distributinos for sig/bkg if (fMvaSig) fMvaSig->Delete(); if (fMvaBkg) fMvaBkg->Delete(); if (fMvaSigFineBin) fMvaSigFineBin->Delete(); if (fMvaBkgFineBin) fMvaBkgFineBin->Delete(); // maybe later on this should be done a bit more clever (time consuming) by // first determining proper ranges, removing outliers, as we do in the // MVA output calculation in MethodBase::TestClassifier... // --> then it might be possible also to use the splined PDF's which currently // doesn't seem to work fMvaSig = new TH1D("fMvaSig","",100,-1.5,1.5); //used for spline fit fMvaBkg = new TH1D("fMvaBkg","",100,-1.5,1.5); //used for spline fit fMvaSigFineBin = new TH1D("fMvaSigFineBin","",100000,-1.5,1.5); fMvaBkgFineBin = new TH1D("fMvaBkgFineBin","",100000,-1.5,1.5); const std::vector events=fMethod->Data()->GetEventCollection(Types::kTesting); UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo("Signal")->GetNumber(); // fMethod->GetTransformationHandler().CalcTransformations(fMethod->Data()->GetEventCollection(Types::kTesting)); for (UInt_t iev=0; iev < events.size() ; iev++){ // std::cout << " GetMVADists event " << iev << std::endl; // std::cout << " Class = " << events[iev]->GetClass() << std::endl; // std::cout << " MVA Value = " << fMethod->GetMvaValue(events[iev]) << std::endl; if (events[iev]->GetClass() == signalClassNr) { fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight()); fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight()); } else { fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight()); fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight()); } } } //_______________________________________________________________________ Double_t TMVA::OptimizeConfigParameters::GetSeparation() { // return the searation between the signal and background // MVA output distribution GetMVADists(); if (1){ PDF *splS = new PDF( " PDF Sig", fMvaSig, PDF::kSpline2 ); PDF *splB = new PDF( " PDF Bkg", fMvaBkg, PDF::kSpline2 ); return gTools().GetSeparation(*splS,*splB); }else{ std::cout << "Separation caclulcaton via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<GetXmin(), pdfB->GetXmin()); Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax()); UInt_t nsteps = 1000; Double_t step = (xmax-xmin)/Double_t(nsteps); Double_t cut = xmin; for (UInt_t i=0; iGetIntegral(cut,xmax)) * pdfS->GetVal(cut); cut+=step; } integral*=step; }else{ // sanity checks if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) || (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){ std::cout << " Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl; std::exit(1); }else{ Double_t *cumulator = fMvaBkgFineBin->GetIntegral(); Int_t nbins = fMvaSigFineBin->GetNbinsX(); // get the true signal integral (CompuetIntegral just return 1 as they // automatically normalize. IN ADDITION, they do not account for variable // bin sizes (which you migh perhaps use later on for the fMvaSig/Bkg histograms) Double_t sigIntegral = 0; for (Int_t ibin=1; ibin<=nbins; ibin++){ sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin); } //gTools().NormHist( fMvaSigFineBin ); // also doesn't use variable bin width. And callse TH1::Scale, which oddly enough does not change the SumOfWeights !!! for (Int_t ibin=1; ibin <= nbins; ibin++){ // don't include under- and overflow bin integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ; } } } return integral; } //_______________________________________________________________________ Double_t TMVA::OptimizeConfigParameters::GetSigEffAtBkgEff(Double_t bkgEff) { // calculate the signal efficiency for a given background efficiency GetMVADists(); Double_t sigEff=0; // sanity checks if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) || (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){ std::cout << " Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl; std::exit(1); }else{ Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral(); Double_t *sigCumulator = fMvaSigFineBin->GetIntegral(); Int_t nbins=fMvaBkgFineBin->GetNbinsX(); Int_t ibin=0; // std::cout << " bkgIntegral="<GetIntegral(); Double_t *sigCumulator = fMvaSigFineBin->GetIntegral(); Int_t nbins=fMvaBkgFineBin->GetNbinsX(); Int_t ibin=0; // std::cout << " bkgIntegral="<GetIntegral(); Double_t *sigCumulator = fMvaSigFineBin->GetIntegral(); Int_t nbins=fMvaBkgFineBin->GetNbinsX(); Int_t ibin=0; // std::cout << " bkgIntegral="<