// @(#)root/mathcore:$Id$ // Authors: Bartolomeu Rabacal 05/2010 /********************************************************************** * * * Copyright (c) 2006 , LCG ROOT MathLib Team * * * * * **********************************************************************/ // Header file for GoFTest #include #ifndef ROOT_Math_GoFTest #define ROOT_Math_GoFTest #ifndef ROOT_Math_WrappedFunction #include "Math/WrappedFunction.h" #endif #ifndef ROOT_TMath #include "TMath.h" #endif /* Goodness of Fit Statistical Tests Toolkit -- Anderson-Darling and Kolmogorov-Smirnov 1- and 2-Samples Tests */ namespace ROOT { namespace Math { class GoFTest { public: enum EDistribution { // H0 distributions for using only with 1-sample tests kUndefined, // Default value for non templated 1-sample test. Set with SetDistribution kUserDefined, // For internal use only within the class's template constructor kGaussian, kLogNormal, kExponential }; enum EUserDistribution { // User input distribution option kCDF, kPDF // Default value }; enum ETestType { // Goodness of Fit test types for using with the class's unary funtions as a shorthand for the in-built methods kAD, // Anderson-Darling Test. Default value kAD2s, // Anderson-Darling 2-Samples Test kKS, // Kolmogorov-Smirnov Test kKS2s // Kolmogorov-Smirnov 2-Samples Test }; /* Constructor for using only with 2-samples tests */ GoFTest(UInt_t sample1Size, const Double_t* sample1, UInt_t sample2Size, const Double_t* sample2); /* Constructor for using only with 1-sample tests with a specified distribution */ GoFTest(UInt_t sampleSize, const Double_t* sample, EDistribution dist = kUndefined); /* Templated constructor for using only with 1-sample tests with a user specified distribution */ template GoFTest(UInt_t sampleSize, const Double_t* sample, Dist& dist, EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) { Instantiate(sample, sampleSize); SetUserDistribution(dist, userDist, xmin, xmax); } /* Specialization using IGenFunction interface */ GoFTest(UInt_t sampleSize, const Double_t* sample, const IGenFunction& dist, EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) { Instantiate(sample, sampleSize); SetUserDistribution(dist, userDist, xmin, xmax); } /* Sets the user input distribution function for 1-sample tests. */ template void SetUserDistribution(Dist& dist, EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) { WrappedFunction wdist(dist); SetDistributionFunction(wdist, userDist, xmin, xmax); } /* Template specialization to set the user input distribution for 1-sample tests */ void SetUserDistribution(const IGenFunction& dist, GoFTest::EUserDistribution userDist = kPDF, Double_t xmin = 1, Double_t xmax = 0) { SetDistributionFunction(dist, userDist, xmin, xmax); } /* Sets the user input distribution as a probability density function for 1-sample tests */ template void SetUserPDF(Dist& pdf, Double_t xmin = 1, Double_t xmax = 0) { SetUserDistribution(pdf, kPDF, xmin, xmax); } /* Template specialization to set the user input distribution as a probability density function for 1-sample tests */ void SetUserPDF(const IGenFunction& pdf, Double_t xmin = 1, Double_t xmax = 0) { SetUserDistribution(pdf, kPDF, xmin, xmax); } /* Sets the user input distribution as a cumulative distribution function for 1-sample tests The CDF must return zero */ template void SetUserCDF(Dist& cdf, Double_t xmin = 1, Double_t xmax = 0) { SetUserDistribution(cdf, kCDF, xmin, xmax); } /* Template specialization to set the user input distribution as a cumulative distribution function for 1-sample tests */ void SetUserCDF(const IGenFunction& cdf, Double_t xmin = 1, Double_t xmax = 0) { SetUserDistribution(cdf, kCDF, xmin, xmax); } /* Sets the distribution for the predefined distribution types */ void SetDistribution(EDistribution dist); virtual ~GoFTest(); /* The Anderson-Darling K-Sample Test algorithm is described and taken from http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andeksam.htm and described and taken from (1) Scholz F.W., Stephens M.A. (1987), K-sample Anderson-Darling Tests, Journal of the American Statistical Association, 82, 918–924. (2-samples variant implemented) */ void AndersonDarling2SamplesTest(Double_t& pvalue, Double_t& testStat) const; Double_t AndersonDarling2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2" /* The Anderson-Darling 1-Sample Test algorithm for a specific distribution is described at http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/andedarl.htm and described and taken from (2) Marsaglia J.C.W., Marsaglia G. (2004), Evaluating the Anderson-Darling Distribution, Journal of Statistical Software, Volume 09, Issue i02. and described and taken from (3) Lewis P.A.W. (1961), The Annals of Mathematical Statistics, Distribution of the Anderson-Darling Statistic, Volume 32, Number 4, 1118-1124. */ void AndersonDarlingTest(Double_t& pvalue, Double_t& testStat) const; Double_t AndersonDarlingTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "A2" /* The Kolmogorov-Smirnov 2-Samples Test algorithm is described at http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/ks2samp.htm and described and taken from http://root.cern.ch/root/html/TMath.html#TMath:KolmogorovTest */ void KolmogorovSmirnov2SamplesTest(Double_t& pvalue, Double_t& testStat) const; Double_t KolmogorovSmirnov2SamplesTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn" /* The Kolmogorov-Smirnov 1-Sample Test algorithm for a specific distribution is described at http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/kstest.htm and described and taken from (4) Press W. H., Teukolsky S.A., Vetterling W.T., Flannery B.P. (2007), Numerical Recipes - The Art of Scientific Computing (Third Edition), Cambridge Univerdity Press */ void KolmogorovSmirnovTest(Double_t& pvalue, Double_t& testStat) const; Double_t KolmogorovSmirnovTest(const Char_t* option = "p") const; // Returns default p-value; option "t" returns the test statistic value "Dn" // The class's unary functions void operator()(ETestType test, Double_t& pvalue, Double_t& testStat) const; Double_t operator()(ETestType test = kAD, const Char_t* option = "p") const; // Returns default Anderson Darling 1-Sample Test and default p-value; option "t" returns the test statistic value specific to the test type private: GoFTest(); // Disallowed default constructor GoFTest(GoFTest& gof); // Disallowed copy constructor GoFTest operator=(GoFTest& gof); // Disallowed assign operator std::auto_ptr fCDF; EDistribution fDist; Double_t fMean; Double_t fSigma; std::vector fCombinedSamples; std::vector > fSamples; Bool_t fTestSampleFromH0; void SetCDF(); void SetDistributionFunction(const IGenFunction& cdf, Bool_t isPDF, Double_t xmin, Double_t xmax); void Instantiate(const Double_t* sample, UInt_t sampleSize); Double_t LogNormalCDF(Double_t x) const; Double_t GaussianCDF(Double_t x) const; Double_t ExponentialCDF(Double_t x) const; Double_t GetSigmaN(UInt_t N) const; // Computation of sigma_N as described in (1) Double_t InterpolatePValues(Double_t dA2, Int_t bin) const; // Linear interpolation used in GoFTest::PValueAD2Samples Double_t PValueAD2Samples(Double_t& A2, UInt_t N) const; // Computation of the 2-Sample Anderson-Darling Test's p-value as described in (1) Double_t PValueAD1Sample(Double_t A2) const; // Computation of the 1-Sample Anderson-Darling Test's p-value void LogSample(); // Applies the logarithm to the sample when the specified distribution to test is LogNormal void SetSamples(std::vector samples, const std::vector samplesSizes); void SetParameters(); // Sets the estimated mean and standard-deviation from the samples }; // end GoFTest class } // ROOT namespace } // Math namespace #endif