/********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : TMVA::DecisionTree * * Web : http://tmva.sourceforge.net * * * * Description: * * IPruneTool - a helper interface class to prune a decision tree * * * * Authors (alphabetical): * * Doug Schouten - Simon Fraser U., Canada * * * * Copyright (c) 2005: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://mva.sourceforge.net/license.txt) * **********************************************************************************/ #ifndef ROOT_TMVA_IPruneTool #define ROOT_TMVA_IPruneTool #include #include #ifndef ROOT_TMVA_SeparationBase #include "TMVA/SeparationBase.h" #endif #ifndef ROOT_TMVA_DecisionTree #include "TMVA/DecisionTree.h" #endif namespace TMVA { // class MsgLogger; //////////////////////////////////////////////////////////// // Basic struct for saving relevant pruning information // //////////////////////////////////////////////////////////// class PruningInfo { public: PruningInfo( ) : QualityIndex(0), PruneStrength(0), PruneSequence(0) {} PruningInfo( Double_t q, Double_t alpha, std::vector sequence ); Double_t QualityIndex; //! quality measure for a pruned subtree T of T_max Double_t PruneStrength; //! the regularization parameter for pruning std::vector PruneSequence; //! the sequence of pruning locations in T_max that yields T }; inline PruningInfo::PruningInfo( Double_t q, Double_t alpha, std::vector sequence ) : QualityIndex(q), PruneStrength(alpha), PruneSequence(sequence) {} //////////////////////////////////////////////////////////////////////////////////////////////////////////// // IPruneTool - a helper interface class to prune a decision tree // // // // Any tool which implements the interface should provide two modes for tree pruning: // // 1. automatically find the "best" prune strength by minimizing the error rate on a test sample // // if SetAutomatic() is called, or if automatic = kTRUE argument is set in CalculatePruningInfo() // // In this case, the PruningInfo object returned contains the error rate of the optimally pruned // // tree, the optimal prune strength, and the sequence of nodes to prune to obtain the optimal // // pruned tree from the original DecisionTree // // // // 2. a user-provided pruning strength parameter is used to prune the tree, in which case the returned // // PruningInfo object has QualityIndex = -1, PruneStrength = user prune strength, and PruneSequence // // is the list of nodes to prune // //////////////////////////////////////////////////////////////////////////////////////////////////////////// class IPruneTool { public: typedef std::vector EventSample; IPruneTool( ); virtual ~IPruneTool(); public: // returns the PruningInfo object for a given tree and test sample virtual PruningInfo* CalculatePruningInfo( DecisionTree* dt, const EventSample* testEvents = NULL, Bool_t isAutomatic = kFALSE ) = 0; public: // set the prune strength parameter to use in pruning inline void SetPruneStrength( Double_t alpha ) { fPruneStrength = alpha; } // return the prune strength the tool is using inline Double_t GetPruneStrength( ) const { return fPruneStrength; } // if the prune strength parameter is < 0, the tool will automatically find an optimal strength // set the tool to automatic mode inline void SetAutomatic( ) { fPruneStrength = -1.0; }; inline Bool_t IsAutomatic( ) const { return fPruneStrength <= 0.0; } protected: // mutable MsgLogger* fLogger; //! output stream to save logging information // MsgLogger& Log() const { return *fLogger; } Double_t fPruneStrength; //! regularization parameter in pruning Double_t S, B; }; inline IPruneTool::IPruneTool( ) : fPruneStrength(0.0), S(0), B(0) {} inline IPruneTool::~IPruneTool( ) {} } #endif