#ifndef TMVA_RSTANDARDSCALER #define TMVA_RSTANDARDSCALER #include #include #include #include #include namespace TMVA { namespace Experimental { template class RStandardScaler { private: std::vector fMeans; std::vector fStds; public: RStandardScaler() = default; RStandardScaler(std::string_view title, std::string_view filename); void Fit(const RTensor& x); std::vector Compute(const std::vector& x); RTensor Compute(const RTensor& x); std::vector GetMeans() const { return fMeans; } std::vector GetStds() const { return fStds; } void Save(std::string_view title, std::string_view filename); }; template inline RStandardScaler::RStandardScaler(std::string_view title, std::string_view filename) { auto file = TFile::Open(filename.data(), "READ"); RStandardScaler* obj; file->GetObject(title.data(), obj); fMeans = obj->GetMeans(); fStds = obj->GetStds(); delete obj; file->Close(); } template inline void RStandardScaler::Save(std::string_view title, std::string_view filename) { auto file = TFile::Open(filename.data(), "UPDATE"); file->WriteObject>(this, title.data()); file->Write(); file->Close(); } template inline void RStandardScaler::Fit(const RTensor& x) { const auto shape = x.GetShape(); if (shape.size() != 2) throw std::runtime_error("Can only fit to input tensor of rank 2."); fMeans.clear(); fMeans.resize(shape[1]); fStds.clear(); fStds.resize(shape[1]); // Compute means for (std::size_t i = 0; i < shape[0]; i++) { for (std::size_t j = 0; j < shape[1]; j++) { fMeans[j] += x(i, j); } } for (std::size_t i = 0; i < shape[1]; i++) { fMeans[i] /= shape[0]; } // Compute standard deviations using unbiased estimator for (std::size_t i = 0; i < shape[0]; i++) { for (std::size_t j = 0; j < shape[1]; j++) { fStds[j] += (x(i, j) - fMeans[j]) * (x(i, j) - fMeans[j]); } } for (std::size_t i = 0; i < shape[1]; i++) { fStds[i] = std::sqrt(fStds[i] / (shape[0] - 1)); } } template inline std::vector RStandardScaler::Compute(const std::vector& x) { const auto size = x.size(); if (size != fMeans.size()) throw std::runtime_error("Size of input vector is not equal to number of fitted variables."); std::vector y(size); for (std::size_t i = 0; i < size; i++) { y[i] = (x[i] - fMeans[i]) / fStds[i]; } return y; } template inline RTensor RStandardScaler::Compute(const RTensor& x) { const auto shape = x.GetShape(); if (shape.size() != 2) throw std::runtime_error("Can only compute output for input tensor of rank 2."); if (shape[1] != fMeans.size()) throw std::runtime_error("Second dimension of input tensor is not equal to number of fitted variables."); RTensor y(shape); for (std::size_t i = 0; i < shape[0]; i++) { for (std::size_t j = 0; j < shape[1]; j++) { y(i, j) = (x(i, j) - fMeans[j]) / fStds[j]; } } return y; } } // namespace Experimental } // namespace TMVA #endif // TMVA_RSTANDARDSCALER