// @(#)root/tmva: $Id$ // Author: Simon Pfreundschuh 20/06/16 /************************************************************************* * Copyright (C) 2016, Simon Pfreundschuh * * All rights reserved. * * * * For the licensing terms see $ROOTSYS/LICENSE. * * For the list of contributors see $ROOTSYS/README/CREDITS. * *************************************************************************/ #ifndef TMVA_DNN_NET #define TMVA_DNN_NET #include #include #include "Layer.h" namespace TMVA { namespace DNN { /** \class TNet Generic neural network class. This generic neural network class represents a concrete neural network through a vector of layers and coordinates the forward and backward propagation through the net. The net takes as input a batch from the training data given in matrix form, with each row corresponding to a certain training event. On construction, the neural network allocates all the memory required for the training of the neural net and keeps it until its destruction. The Architecture type argument simply holds the architecture-specific data types, which are just the matrix type Matrix_t and the used scalar type Scalar_t. \tparam Architecture The Architecture type that holds the \tparam Layer_t The type used for the layers. Can be either Layer or SharedWeightLayer. datatypes for a given architecture. */ template> class TNet { public: using Matrix_t = typename Architecture_t::Matrix_t; using Scalar_t = typename Architecture_t::Scalar_t; using LayerIterator_t = typename std::vector::iterator; private: size_t fBatchSize; ///< Batch size for training and evaluation of the Network. size_t fInputWidth; ///< Number of features in a single input event. std::vector fLayers; ///< Layers in the network. Matrix_t fDummy; ///< Empty matrix for last step in back propagation. ELossFunction fJ; ///< The loss function of the network. ERegularization fR; ///< The regularization used for the network. Scalar_t fWeightDecay; ///< The weight decay factor. public: TNet(); TNet(const TNet & other); template TNet(size_t batchSize, const TNet &); /*! Construct a neural net for a given batch size with * given output function * and regularization. */ TNet(size_t batchSize, size_t inputWidth, ELossFunction fJ, ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0); /*! Create a clone that uses the same weight and biases matrices but * potentially a difference batch size. */ TNet> CreateClone(size_t batchSize); /*! Add a layer of the given size to the neural net. */ void AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0); /*! Remove all layers from the network.*/ void Clear(); /*! Add a layer which shares its weights with another TNet instance. */ template void AddLayer(SharedLayer & layer); /*! Iterator to the first layer of the net. */ LayerIterator_t LayersBegin() {return fLayers;} /*! Iterator to the last layer of the net. */ LayerIterator_t LayersEnd() {return fLayers;} /*! Initialize the weights in the net with the * initialization method. */ inline void Initialize(EInitialization m); /*! Initialize the gradients in the net to zero. Required if net is * used to store velocities of momentum-based minimization techniques. */ inline void InitializeGradients(); /*! Forward a given input through the neural net. Computes * all layer activations up to the output layer */ inline void Forward(Matrix_t& X, bool applyDropout = false); /*! Compute the weight gradients in the net from the given training * samples X and training labels Y. */ inline void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights); /*! Evaluate the loss function of the net using the activations * that are currently stored in the output layer. */ inline Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization = true) const; /*! Propagate the input batch X through the net and evaluate the * error function for the resulting activations of the output * layer */ inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout = false, bool includeRegularization = true); /*! Compute the neural network prediction obtained from forwarding the * batch X through the neural network and applying the output function * f to the activation of the last layer in the network. */ inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f); /*! Compute the neural network prediction obtained from applying the output * function f to the activation of the last layer in the network. */ inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const; Scalar_t GetNFlops(); size_t GetDepth() const {return fLayers.size();} size_t GetBatchSize() const {return fBatchSize;} Layer_t & GetLayer(size_t i) {return fLayers[i];} const Layer_t & GetLayer(size_t i) const {return fLayers[i];} ELossFunction GetLossFunction() const {return fJ;} Matrix_t & GetOutput() {return fLayers.back().GetOutput();} size_t GetInputWidth() const {return fInputWidth;} size_t GetOutputWidth() const {return fLayers.back().GetWidth();} ERegularization GetRegularization() const {return fR;} Scalar_t GetWeightDecay() const {return fWeightDecay;} void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;} void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;} void SetRegularization(ERegularization R) {fR = R;} void SetLossFunction(ELossFunction J) {fJ = J;} void SetWeightDecay(Scalar_t weightDecay) {fWeightDecay = weightDecay;} void SetDropoutProbabilities(const std::vector & probabilities); void Print(); }; //______________________________________________________________________________ template TNet::TNet() : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0), fJ(ELossFunction::kMeanSquaredError), fR(ERegularization::kNone), fWeightDecay(0.0) { // Nothing to do here. } //______________________________________________________________________________ template TNet::TNet(const TNet & other) : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth), fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR), fWeightDecay(other.fWeightDecay) { // Nothing to do here. } //______________________________________________________________________________ template template TNet::TNet(size_t batchSize, const TNet & other) : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(), fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()), fWeightDecay(other.GetWeightDecay()) { fLayers.reserve(other.GetDepth()); for (size_t i = 0; i < other.GetDepth(); i++) { AddLayer(other.GetLayer(i).GetWidth(), other.GetLayer(i).GetActivationFunction(), other.GetLayer(i).GetDropoutProbability()); fLayers[i].GetWeights() = (TMatrixT) other.GetLayer(i).GetWeights(); fLayers[i].GetBiases() = (TMatrixT) other.GetLayer(i).GetBiases(); } } //______________________________________________________________________________ template TNet::TNet(size_t batchSize, size_t inputWidth, ELossFunction J, ERegularization R, Scalar_t weightDecay) : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0), fJ(J), fR(R), fWeightDecay(weightDecay) { // Nothing to do here. } //______________________________________________________________________________ template auto TNet::CreateClone(size_t BatchSize) -> TNet> { TNet> other(BatchSize, fInputWidth, fJ, fR); for (auto &l : fLayers) { other.AddLayer(l); } return other; } //______________________________________________________________________________ template void TNet::AddLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability) { if (fLayers.size() == 0) { fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability); } else { size_t prevWidth = fLayers.back().GetWidth(); fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability); } } //______________________________________________________________________________ template void TNet::Clear() { fLayers.clear(); } //______________________________________________________________________________ template template inline void TNet::AddLayer(SharedLayer_t & layer) { fLayers.emplace_back(fBatchSize, layer); } //______________________________________________________________________________ template inline void TNet::Initialize(EInitialization m) { for (auto &l : fLayers) { l.Initialize(m); } } //______________________________________________________________________________ template inline void TNet::InitializeGradients() { for (auto &l : fLayers) { initialize(l.GetWeightGradients(), EInitialization::kZero); initialize(l.GetBiasGradients(), EInitialization::kZero); } } //______________________________________________________________________________ template inline void TNet::Forward(Matrix_t &input, bool applyDropout) { fLayers.front().Forward(input, applyDropout); for (size_t i = 1; i < fLayers.size(); i++) { fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout); } } //______________________________________________________________________________ template inline void TNet::Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights) { evaluateGradients(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(), weights); for (size_t i = fLayers.size()-1; i > 0; i--) { auto & activation_gradient_backward = fLayers[i-1].GetActivationGradients(); auto & activations_backward = fLayers[i-1].GetOutput(); fLayers[i].Backward(activation_gradient_backward, activations_backward, fR, fWeightDecay); } fLayers[0].Backward(fDummy, X, fR, fWeightDecay); } //______________________________________________________________________________ template inline auto TNet::Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization) const -> Scalar_t { auto loss = evaluate(fJ, Y, fLayers.back().GetOutput(), weights); includeRegularization &= (fR != ERegularization::kNone); if (includeRegularization) { for (auto &l : fLayers) { loss += fWeightDecay * regularization(l.GetWeights(), fR); } } return loss; } //______________________________________________________________________________ template inline auto TNet::Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout, bool includeRegularization) -> Scalar_t { Forward(X, applyDropout); return Loss(Y, weights, includeRegularization); } //______________________________________________________________________________ template inline void TNet::Prediction(Matrix_t &Yhat, Matrix_t &X, EOutputFunction f) { Forward(X, false); evaluate(Yhat, f, fLayers.back().GetOutput()); } //______________________________________________________________________________ template inline void TNet::Prediction(Matrix_t &Y_hat, EOutputFunction f) const { evaluate(Y_hat, f, fLayers.back().GetOutput()); } //______________________________________________________________________________ template auto TNet::GetNFlops() -> Scalar_t { Scalar_t flops = 0; Scalar_t nb = (Scalar_t) fBatchSize; Scalar_t nlp = (Scalar_t) fInputWidth; for(size_t i = 0; i < fLayers.size(); i++) { Layer_t & layer = fLayers[i]; Scalar_t nl = (Scalar_t) layer.GetWidth(); // Forward propagation. flops += nb * nl * (2.0 * nlp - 1); // Matrix mult. flops += nb * nl; // Add bias values. flops += 2 * nb * nl; // Apply activation function and compute // derivative. // Backward propagation. flops += nb * nl; // Hadamard flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients flops += nl * (nb - 1); // Bias gradients if (i > 0) { flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients. } nlp = nl; } return flops; } //______________________________________________________________________________ template void TNet::SetDropoutProbabilities( const std::vector & probabilities) { for (size_t i = 0; i < fLayers.size(); i++) { if (i < probabilities.size()) { fLayers[i].SetDropoutProbability(probabilities[i]); } else { fLayers[i].SetDropoutProbability(1.0); } } } //______________________________________________________________________________ template void TNet::Print() { std::cout << "DEEP NEURAL NETWORK:"; std::cout << " Loss function = " << static_cast(fJ); std::cout << ", Depth = " << fLayers.size() << std::endl; size_t i = 1; for (auto & l : fLayers) { std::cout << "DNN Layer " << i << ":" << std::endl; l.Print(); i++; } } } // namespace DNN } // namespace TMVA #endif