// @(#)root/tmva $Id$ // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Peter Speckmayer, Eckhard von Toerne /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : VariableNormalizeTransform * * Web : http://tmva.sourceforge.net * * * * Description: * * Implementation (see header for description) * * * * Authors (alphabetical): * * Andreas Hoecker - CERN, Switzerland * * Joerg Stelzer - CERN, Switzerland * * Peter Speckmayer - CERN, Switzerland * * Helge Voss - MPI-K Heidelberg, Germany * * Eckhard v. Toerne - U of Bonn, Germany * * * * Copyright (c) 2005-2011: * * CERN, Switzerland * * MPI-K Heidelberg, Germany * * U. of Bonn, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ #include #include #include "TVectorF.h" #include "TVectorD.h" #include "TMatrixD.h" #include "TMatrixDBase.h" #ifndef ROOT_TMVA_MsgLogger #include "TMVA/MsgLogger.h" #endif #ifndef ROOT_TMVA_VariableNormalizeTransform #include "TMVA/VariableNormalizeTransform.h" #endif #ifndef ROOT_TMVA_Tools #include "TMVA/Tools.h" #endif #ifndef ROOT_TMVA_DataSet #include "TMVA/DataSet.h" #endif ClassImp(TMVA::VariableNormalizeTransform) //_______________________________________________________________________ TMVA::VariableNormalizeTransform::VariableNormalizeTransform( DataSetInfo& dsi ) : VariableTransformBase( dsi, Types::kNormalized, "Norm" ) { // constructor } //_______________________________________________________________________ TMVA::VariableNormalizeTransform::~VariableNormalizeTransform() { } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::Initialize() { // initialization of the normalization transformation UInt_t inputSize = fGet.size(); Int_t numC = GetNClasses()+1; if (GetNClasses() <= 1 ) numC = 1; fMin.resize( numC ); fMax.resize( numC ); for (Int_t i=0; i& events ) { // prepare transformation if (!IsEnabled() || IsCreated()) return kTRUE; Log() << kINFO << "Preparing the transformation." << Endl; Initialize(); CalcNormalizationParams( events ); SetCreated( kTRUE ); return kTRUE; } //_______________________________________________________________________ const TMVA::Event* TMVA::VariableNormalizeTransform::Transform( const TMVA::Event* const ev, Int_t cls ) const { // apply the normalization transformation if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; // if cls (the class chosen by the user) not existing, // assume that he wants to have the matrix for all classes together. // if (cls < 0 || cls > GetNClasses()) { // if (GetNClasses() > 1 ) cls = GetNClasses(); // else cls = (fMin.size()==1?0:2); // } // EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector ,...) if (cls < 0 || cls >= (int) fMin.size()) cls = fMin.size()-1; // EVT workaround end FloatVector input; // will be filled with the selected variables, targets, (spectators) FloatVector output; // will be filled with the selected variables, targets, (spectators) std::vector mask; // entries with kTRUE must not be transformed GetInput( ev, input, mask ); if (fTransformedEvent==0) fTransformedEvent = new Event(); Float_t min,max; const FloatVector& minVector = fMin.at(cls); const FloatVector& maxVector = fMax.at(cls); UInt_t iidx = 0; std::vector::iterator itMask = mask.begin(); for ( std::vector::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables if( (*itMask) ){ ++iidx; ++itMask; // don't put any value into output if the value is masked continue; } Float_t val = (*itInp); min = minVector.at(iidx); max = maxVector.at(iidx); Float_t offset = min; Float_t scale = 1.0/(max-min); Float_t valnorm = (val-offset)*scale * 2 - 1; output.push_back( valnorm ); ++iidx; ++itMask; } SetOutput( fTransformedEvent, output, mask, ev ); return fTransformedEvent; } //_______________________________________________________________________ const TMVA::Event* TMVA::VariableNormalizeTransform::InverseTransform( const TMVA::Event* const ev, Int_t cls ) const { // apply the inverse transformation if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl; // if cls (the class chosen by the user) not existing, // assume that user wants to have the transformation for all classes together. if (cls < 0 || cls > GetNClasses()) { if (GetNClasses() > 1 ) cls = GetNClasses(); else cls = 0; } FloatVector input; // will be filled with the selected variables, targets, (spectators) FloatVector output; // will be filled with the output std::vector mask; GetInput( ev, input, mask, kTRUE ); if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev ); Float_t min,max; const FloatVector& minVector = fMin.at(cls); const FloatVector& maxVector = fMax.at(cls); UInt_t iidx = 0; for ( std::vector::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables Float_t val = (*itInp); min = minVector.at(iidx); max = maxVector.at(iidx); Float_t offset = min; Float_t scale = 1.0/(max-min); Float_t valnorm = offset+((val+1)/(scale * 2)); output.push_back( valnorm ); ++iidx; } SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE ); return fBackTransformedEvent; } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::CalcNormalizationParams( const std::vector& events ) { // compute offset and scale from min and max if (events.size() <= 1) Log() << kFATAL << "Not enough events (found " << events.size() << ") to calculate the normalization" << Endl; FloatVector input; // will be filled with the selected variables, targets, (spectators) std::vector mask; UInt_t inputSize = fGet.size(); // number of input variables const UInt_t nCls = GetNClasses(); Int_t numC = nCls+1; // prepare the min and max values for each of the classes and additionally for all classes (if more than one) Int_t all = nCls; // at idx the min and max values for "all" classes are stored if (nCls <= 1 ) { numC = 1; all = 0; } for (UInt_t iinp=0; iinp::const_iterator evIt = events.begin(); for (;evIt!=events.end();evIt++) { // loop over all events TMVA::Event* event = (*evIt); // get the event UInt_t cls = (*evIt)->GetClass(); // get the class of this event FloatVector& minVector = fMin.at(cls); FloatVector& maxVector = fMax.at(cls); FloatVector& minVectorAll = fMin.at(all); FloatVector& maxVectorAll = fMax.at(all); GetInput(event,input,mask); // select the input variables for the transformation and get them from the event UInt_t iidx = 0; for ( std::vector::iterator itInp = input.begin(), itInpEnd = input.end(); itInp != itInpEnd; ++itInp) { // loop over input variables Float_t val = (*itInp); if( minVector.at(iidx) > val ) minVector.at(iidx) = val; if( maxVector.at(iidx) < val ) maxVector.at(iidx) = val; if (nCls != 1) { // in case more than one class exists, compute min and max as well for all classes together if (minVectorAll.at(iidx) > val) minVectorAll.at(iidx) = val; if (maxVectorAll.at(iidx) < val) maxVectorAll.at(iidx) = val; } ++iidx; } } return; } //_______________________________________________________________________ std::vector* TMVA::VariableNormalizeTransform::GetTransformationStrings( Int_t cls ) const { // creates string with variable transformations applied // if cls (the class chosen by the user) not existing, assume that user wants to // have the matrix for all classes together. if (cls < 0 || cls > GetNClasses()) cls = GetNClasses(); Float_t min, max; const UInt_t size = fGet.size(); std::vector* strVec = new std::vector(size); UInt_t iinp = 0; for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ) { min = fMin.at(cls).at(iinp); max = fMax.at(cls).at(iinp); Char_t type = (*itGet).first; UInt_t idx = (*itGet).second; Float_t offset = min; Float_t scale = 1.0/(max-min); TString str(""); VariableInfo& varInfo = (type=='v'?fDsi.GetVariableInfo(idx):(type=='t'?fDsi.GetTargetInfo(idx):fDsi.GetSpectatorInfo(idx))); if (offset < 0) str = Form( "2*%g*([%s] + %g) - 1", scale, varInfo.GetLabel().Data(), -offset ); else str = Form( "2*%g*([%s] - %g) - 1", scale, varInfo.GetLabel().Data(), offset ); (*strVec)[iinp] = str; ++iinp; } return strVec; } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::WriteTransformationToStream( std::ostream& o ) const { // write the decorrelation matrix to the stream o << "# min max for all variables for all classes one after the other and as a last entry for all classes together" << std::endl; Int_t numC = GetNClasses()+1; if (GetNClasses() <= 1 ) numC = 1; UInt_t nvars = GetNVariables(); UInt_t ntgts = GetNTargets(); for (Int_t icls = 0; icls < numC; icls++ ) { o << icls << std::endl; for (UInt_t ivar=0; ivar('v',ivar)); } for( UInt_t itgt = 0; itgt < ntgts; ++itgt ){ fGet.push_back(std::pair('t',itgt)); } void* ch = gTools().GetChild( trfnode ); while(ch) { gTools().ReadAttr(ch, "ClassIndex", classindex); fMin.resize(classindex+1); fMax.resize(classindex+1); fMin[classindex].resize(nvars+ntgts,Float_t(0)); fMax[classindex].resize(nvars+ntgts,Float_t(0)); void* clch = gTools().GetChild( ch ); while(clch) { TString nodeName(gTools().GetName(clch)); if(nodeName=="Variables") { void* varch = gTools().GetChild( clch ); while(varch) { gTools().ReadAttr(varch, "VarIndex", varindex); gTools().ReadAttr(varch, "Min", fMin[classindex][varindex]); gTools().ReadAttr(varch, "Max", fMax[classindex][varindex]); varch = gTools().GetNextChild( varch ); } } else if (nodeName=="Targets") { void* tgtch = gTools().GetChild( clch ); while(tgtch) { gTools().ReadAttr(tgtch, "TargetIndex", tgtindex); gTools().ReadAttr(tgtch, "Min", fMin[classindex][nvars+tgtindex]); gTools().ReadAttr(tgtch, "Max", fMax[classindex][nvars+tgtindex]); tgtch = gTools().GetNextChild( tgtch ); } } clch = gTools().GetNextChild( clch ); } ch = gTools().GetNextChild( ch ); } SetCreated(); } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::BuildTransformationFromVarInfo( const std::vector& var ) { // this method is only used when building a normalization transformation // from old text files // in this case regression didn't exist and there were no targets UInt_t nvars = GetNVariables(); if(var.size() != nvars) Log() << kFATAL << " can't build transformation," << " since the number of variables disagree" << Endl; UInt_t numC = (GetNClasses()<=1)?1:GetNClasses()+1; fMin.clear();fMin.resize( numC ); fMax.clear();fMax.resize( numC ); for(UInt_t cls=0; cls::const_iterator v = var.begin(); v!=var.end(); ++v, ++vidx) { fMin[cls][vidx] = v->GetMin(); fMax[cls][vidx] = v->GetMax(); fGet.push_back(std::pair('v',vidx)); } } SetCreated(); } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::ReadTransformationFromStream( std::istream& istr, const TString& ) { // Read the variable ranges from an input stream UInt_t nvars = GetNVariables(); UInt_t ntgts = GetNTargets(); for( UInt_t ivar = 0; ivar < nvars; ++ivar ){ fGet.push_back(std::pair('v',ivar)); } for( UInt_t itgt = 0; itgt < ntgts; ++itgt ){ fGet.push_back(std::pair('t',itgt)); } char buf[512]; char buf2[512]; istr.getline(buf,512); TString strvar, dummy; Int_t icls; TString test; while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return char* p = buf; while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace if (*p=='#' || *p=='\0') { istr.getline(buf,512); continue; // if comment or empty line, read the next line } std::stringstream sstr(buf); sstr >> icls; for (UInt_t ivar=0;ivar> fMin[icls][ivar] >> fMax[icls][ivar]; } for (UInt_t itgt=0;itgt> fMin[icls][nvars+itgt] >> fMax[icls][nvars+itgt]; } istr.getline(buf,512); // reading the next line } SetCreated(); } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::PrintTransformation( ostream& /* o */ ) { // prints the transformation ranges Int_t nCls = GetNClasses(); Int_t numC = nCls+1; if (nCls <= 1 ) numC = 1; for (Int_t icls = 0; icls < numC; icls++ ) { if( icls == nCls ) Log() << kINFO << "Transformation for all classes based on these ranges:" << Endl; else Log() << kINFO << "Transformation for class " << icls << " based on these ranges:" << Endl; UInt_t iinp = 0; for( ItVarTypeIdxConst itGet = fGet.begin(), itGetEnd = fGet.end(); itGet != itGetEnd; ++itGet ){ Char_t type = (*itGet).first; UInt_t idx = (*itGet).second; TString typeString = (type=='v'?"Variable: ": (type=='t'?"Target : ":"Spectator : ") ); Log() << typeString.Data() << std::setw(20) << fMin[icls][idx] << std::setw(20) << fMax[icls][idx] << Endl; ++iinp; } } } //_______________________________________________________________________ void TMVA::VariableNormalizeTransform::MakeFunction( std::ostream& fout, const TString& fcncName, Int_t part, UInt_t trCounter, Int_t ) { // creates a normalizing function // TODO include target-transformation into makefunction UInt_t nVar = fGet.size(); UInt_t numC = fMin.size(); if (part==1) { fout << std::endl; fout << " double fMin_"<& iv, int cls) const" << std::endl; fout << "{" << std::endl; fout << " // Normalization transformation" << std::endl; fout << " if (cls < 0 || cls > "< 1 ) cls = "< dv(nVar);" << std::endl; fout << " for (int ivar=0; ivar