// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : MethodDT (Boosted Decision Trees) * * Web : http://tmva.sourceforge.net * * * * Description: * * Analysis of Boosted Decision Trees * * * * Authors (alphabetical): * * Andreas Hoecker - CERN, Switzerland * * Helge Voss - MPI-K Heidelberg, Germany * * Or Cohen - Weizmann Inst., Israel * * * * Copyright (c) 2005: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ #ifndef ROOT_TMVA_MethodDT #define ROOT_TMVA_MethodDT ////////////////////////////////////////////////////////////////////////// // // // MethodDT // // // // Analysis of Single Decision Tree // // // ////////////////////////////////////////////////////////////////////////// #include #include "TH1.h" #include "TH2.h" #include "TTree.h" #include "TMVA/MethodBase.h" #include "TMVA/DecisionTree.h" #include "TMVA/Event.h" namespace TMVA { class MethodBoost; class MethodDT : public MethodBase { public: MethodDT( const TString& jobName, const TString& methodTitle, DataSetInfo& theData, const TString& theOption = ""); MethodDT( DataSetInfo& dsi, const TString& theWeightFile); virtual ~MethodDT( void ); virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets ); void Train( void ); using MethodBase::ReadWeightsFromStream; // write weights to file void AddWeightsXMLTo( void* parent ) const; // read weights from file void ReadWeightsFromStream( std::istream& istr ); void ReadWeightsFromXML ( void* wghtnode ); // calculate the MVA value Double_t GetMvaValue( Double_t* err = nullptr, Double_t* errUpper = nullptr ); // the option handling methods void DeclareOptions(); void ProcessOptions(); void DeclareCompatibilityOptions(); void GetHelpMessage() const; // ranking of input variables const Ranking* CreateRanking(); Double_t PruneTree( ); Double_t TestTreeQuality( DecisionTree *dt ); Double_t GetPruneStrength () { return fPruneStrength; } void SetMinNodeSize(Double_t sizeInPercent); void SetMinNodeSize(TString sizeInPercent); Int_t GetNNodesBeforePruning(){return fTree->GetNNodesBeforePruning();} Int_t GetNNodes(){return fTree->GetNNodes();} private: // Init used in the various constructors void Init( void ); private: std::vector fEventSample; ///< the training events DecisionTree* fTree; ///< the decision tree //options for the decision Tree SeparationBase *fSepType; ///< the separation used in node splitting TString fSepTypeS; ///< the separation (option string) used in node splitting Int_t fMinNodeEvents; ///< min number of events in node Float_t fMinNodeSize; ///< min percentage of training events in node TString fMinNodeSizeS; ///< string containing min percentage of training events in node Int_t fNCuts; ///< grid used in cut applied in node splitting Bool_t fUseYesNoLeaf; ///< use sig or bkg classification in leave nodes or sig/bkg Double_t fNodePurityLimit; ///< purity limit for sig/bkg nodes UInt_t fMaxDepth; ///< max depth Double_t fErrorFraction; ///< ntuple var: misclassification error fraction Double_t fPruneStrength; ///< a parameter to set the "amount" of pruning..needs to be adjusted DecisionTree::EPruneMethod fPruneMethod; ///< method used for pruning TString fPruneMethodS; ///< prune method option String Bool_t fAutomatic; ///< use user given prune strength or automatically determined one using a validation sample Bool_t fRandomisedTrees; ///< choose a random subset of possible cut variables at each node during training Int_t fUseNvars; ///< the number of variables used in the randomised tree splitting Bool_t fUsePoissonNvars; ///< fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution std::vector fVariableImportance; ///< the relative importance of the different variables Double_t fDeltaPruneStrength; ///< step size in pruning, is adjusted according to experience of previous trees // debugging flags static const Int_t fgDebugLevel = 0; ///< debug level determining some printout/control plots etc. Bool_t fPruneBeforeBoost; ///< ancient variable, only needed for "CompatibilityOptions" ClassDef(MethodDT,0); // Analysis of Decision Trees }; } #endif