// @(#)root/roostats:$Id$ // Author: Kyle Cranmer January 2009 /************************************************************************* * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. * * All rights reserved. * * * * For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ /** \class RooStats::NeymanConstruction \ingroup Roostats NeymanConstruction is a concrete implementation of the NeymanConstruction interface that, as the name suggests, performs a NeymanConstruction. It produces a RooStats::PointSetInterval, which is a concrete implementation of the ConfInterval interface. The Neyman Construction is not a uniquely defined statistical technique, it requires that one specify an ordering rule or ordering principle, which is usually incoded by choosing a specific test statistic and limits of integration (corresponding to upper/lower/central limits). As a result, this class must be configured with the corresponding information before it can produce an interval. Common configurations, such as the Feldman-Cousins approach, can be enforced by other light weight classes. The Neyman Construction considers every point in the parameter space independently, no assumptions are made that the interval is connected or of a particular shape. As a result, the PointSetInterval class is used to represent the result. The user indicate which points in the parameter space to perform the construction by providing a PointSetInterval instance with the desired points. This class is fairly light weight, because the choice of parameter points to be considered is factorized and so is the creation of the sampling distribution of the test statistic (which is done by a concrete class implementing the DistributionCreator interface). As a result, this class basically just drives the construction by: - using a DistributionCreator to create the SamplingDistribution of a user- defined test statistic for each parameter point of interest, - defining the acceptance region in the data by finding the thresholds on the test statistic such that the integral of the sampling distribution is of the appropriate size and consistent with the limits of integration (eg. upper/lower/central limits), - and finally updating the PointSetInterval based on whether the value of the test statistic evaluated on the data are in the acceptance region. */ #include "RooStats/NeymanConstruction.h" #include "RooStats/RooStatsUtils.h" #include "RooStats/PointSetInterval.h" #include "RooStats/SamplingDistribution.h" #include "RooStats/ToyMCSampler.h" #include "RooStats/ModelConfig.h" #include "RooMsgService.h" #include "RooGlobalFunc.h" #include "RooDataSet.h" #include "TFile.h" #include "TTree.h" #include "TMath.h" #include "TH1F.h" ClassImp(RooStats::NeymanConstruction); ; using namespace RooFit; using namespace RooStats; using namespace std; //////////////////////////////////////////////////////////////////////////////// /// default constructor NeymanConstruction::NeymanConstruction(RooAbsData& data, ModelConfig& model): fSize(0.05), fData(data), fModel(model), fTestStatSampler(0), fPointsToTest(0), fLeftSideFraction(0), fConfBelt(0), // constructed with tree data fAdaptiveSampling(false), fAdditionalNToysFactor(1.), fSaveBeltToFile(false), fCreateBelt(false) { // fWS = new RooWorkspace(); // fOwnsWorkspace = true; // fDataName = ""; // fPdfName = ""; } //////////////////////////////////////////////////////////////////////////////// /// default constructor /// if(fOwnsWorkspace && fWS) delete fWS; /// if(fConfBelt) delete fConfBelt; NeymanConstruction::~NeymanConstruction() { } //////////////////////////////////////////////////////////////////////////////// /// Main interface to get a RooStats::ConfInterval. /// It constructs a RooStats::SetInterval. PointSetInterval* NeymanConstruction::GetInterval() const { TFile* f=0; if(fSaveBeltToFile){ //coverity[FORWARD_NULL] oocoutI(f,Contents) << "NeymanConstruction saving ConfidenceBelt to file SamplingDistributions.root" << endl; f = new TFile("SamplingDistributions.root","recreate"); } Int_t npass = 0; RooArgSet* point; // strange problems when using snapshots. // RooArgSet* fPOI = (RooArgSet*) fModel.GetParametersOfInterest()->snapshot(); RooArgSet* fPOI = new RooArgSet(*fModel.GetParametersOfInterest()); RooDataSet* pointsInInterval = new RooDataSet("pointsInInterval", "points in interval", *(fPointsToTest->get(0)) ); // loop over points to test for(Int_t i=0; inumEntries(); ++i){ // get a parameter point from the list of points to test. point = (RooArgSet*) fPointsToTest->get(i);//->clone("temp"); // set parameters of interest to current point *fPOI = *point; // set test stat sampler to use this point fTestStatSampler->SetParametersForTestStat(*fPOI); // get the value of the test statistic for this data set Double_t thisTestStatistic = fTestStatSampler->EvaluateTestStatistic(fData, *fPOI ); /* cout << "NC CHECK: " << i << endl; point->Print(); fPOI->Print("v"); fData.Print(); cout <<"thisTestStatistic = " << thisTestStatistic << endl; */ // find the lower & upper thresholds on the test statistic that // define the acceptance region in the data SamplingDistribution* samplingDist=0; Double_t sigma; Double_t upperEdgeOfAcceptance, upperEdgeMinusSigma, upperEdgePlusSigma; Double_t lowerEdgeOfAcceptance, lowerEdgeMinusSigma, lowerEdgePlusSigma; Int_t additionalMC=0; // the adaptive sampling algorithm wants at least one toy event to be outside // of the requested pvalue including the sampling variation. That leads to an equation // N-1 = (1-alpha)N + Z sqrt(N - (1-alpha)N) // for upper limit and // 1 = alpha N - Z sqrt(alpha N) // for lower limit // // solving for N gives: // N = 1/alpha * [3/2 + sqrt(5)] for Z = 1 (which is used currently) // thus, a good guess for the first iteration of events is N=3.73/alpha~4/alpha // should replace alpha here by smaller tail probability: eg. alpha*Min(leftsideFrac, 1.-leftsideFrac) // totalMC will be incremented by 2 before first call, so initiated it at half the value Int_t totalMC = (Int_t) (2./fSize/TMath::Min(fLeftSideFraction,1.-fLeftSideFraction)); if(fLeftSideFraction==0. || fLeftSideFraction ==1.){ totalMC = (Int_t) (2./fSize); } // use control Double_t tmc = Double_t(totalMC)*fAdditionalNToysFactor; totalMC = (Int_t) tmc; ToyMCSampler* toyMCSampler = dynamic_cast(fTestStatSampler); if(fAdaptiveSampling && toyMCSampler) { do{ // this will be executed first, then while conditioned checked // as an exit condition for the loop. // the next line is where most of the time will be spent // generating the sampling dist of the test statistic. additionalMC = 2*totalMC; // grow by a factor of two samplingDist = toyMCSampler->AppendSamplingDistribution(*point, samplingDist, additionalMC); if (!samplingDist) { oocoutE((TObject*)0,Eval) << "Neyman Construction: error generating sampling distribution" << endl; return 0; } totalMC=samplingDist->GetSize(); //cout << "without sigma upper = " << //samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ) << endl; sigma = 1; upperEdgeOfAcceptance = samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , sigma, upperEdgePlusSigma); sigma = -1; samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) , sigma, upperEdgeMinusSigma); sigma = 1; lowerEdgeOfAcceptance = samplingDist->InverseCDF( fLeftSideFraction * fSize , sigma, lowerEdgePlusSigma); sigma = -1; samplingDist->InverseCDF( fLeftSideFraction * fSize , sigma, lowerEdgeMinusSigma); ooccoutD(samplingDist,Eval) << "NeymanConstruction: " << "total MC = " << totalMC << " this test stat = " << thisTestStatistic << endl << " upper edge -1sigma = " << upperEdgeMinusSigma << ", upperEdge = "<InverseCDF( fLeftSideFraction * fSize ); upperEdgeOfAcceptance = samplingDist->InverseCDF( 1. - ((1.-fLeftSideFraction) * fSize) ); } // add acceptance region to ConfidenceBelt if(fConfBelt && fCreateBelt){ // cout << "conf belt set " << fConfBelt << endl; fConfBelt->AddAcceptanceRegion(*point, i, lowerEdgeOfAcceptance, upperEdgeOfAcceptance); } // printout some debug info TIter itr = point->createIterator(); RooRealVar* myarg; ooccoutP(samplingDist,Eval) << "NeymanConstruction: Prog: "<< i+1<<"/"<numEntries() << " total MC = " << samplingDist->GetSize() << " this test stat = " << thisTestStatistic << endl; ooccoutP(samplingDist,Eval) << " "; while ((myarg = (RooRealVar *)itr.Next())) { ooccoutP(samplingDist,Eval) << myarg->GetName() << "=" << myarg->getVal() << " "; } ooccoutP(samplingDist,Eval) << "[" << lowerEdgeOfAcceptance << ", " << upperEdgeOfAcceptance << "] " << " in interval = " << (thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) << endl << endl; // Check if this data is in the acceptance region if(thisTestStatistic >= lowerEdgeOfAcceptance && thisTestStatistic <= upperEdgeOfAcceptance) { // if so, set this point to true // fPointsToTest->add(*point, 1.); // this behaves differently for Hist and DataSet pointsInInterval->add(*point); ++npass; } if(fSaveBeltToFile){ //write to file samplingDist->Write(); string tmpName = "hist_"; tmpName+=samplingDist->GetName(); TH1F* h = new TH1F(tmpName.c_str(),"",500,0.,5.); for(int ii=0; iiGetSize(); ++ii){ h->Fill(samplingDist->GetSamplingDistribution().at(ii) ); } h->Write(); delete h; } delete samplingDist; // delete point; // from dataset } oocoutI(pointsInInterval,Eval) << npass << " points in interval" << endl; // create an interval based pointsInInterval PointSetInterval* interval = new PointSetInterval("ClassicalConfidenceInterval", *pointsInInterval); if(fSaveBeltToFile){ // write belt to file fConfBelt->Write(); f->Close(); } delete f; //delete data; return interval; }