// Author: Vladimir Ilievski /********************************************************************************** * Project: TMVA - a Root-integrated toolkit for multivariate data analysis * * Package: TMVA * * Class : TDenseLayer * * Web : http://tmva.sourceforge.net * * * * Description: * * Dense Layer Class * * * * Authors (alphabetical): * * Vladimir Ilievski - CERN, Switzerland * * * * Copyright (c) 2005-2015: * * CERN, Switzerland * * U. of Victoria, Canada * * MPI-K Heidelberg, Germany * * U. of Bonn, Germany * * * * Redistribution and use in source and binary forms, with or without * * modification, are permitted according to the terms listed in LICENSE * * (http://tmva.sourceforge.net/LICENSE) * **********************************************************************************/ #ifndef TMVA_DNN_DENSELAYER #define TMVA_DNN_DENSELAYER #include "TMatrix.h" #include "TMVA/DNN/GeneralLayer.h" #include "TMVA/DNN/Functions.h" #include "TMVA/DNN/CNN/ContextHandles.h" #include #include #include #include namespace TMVA { namespace DNN { /** \class TDenseLayer Generic layer class. This generic layer class represents a dense layer of a neural network with a given width n and activation function f. The activation function of each layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$. In addition to the weight and bias matrices, each layer allocates memory for its activations and the corresponding input tensor before evaluation of the activation function as well as the gradients of the weights and biases. The layer provides member functions for the forward propagation of activations through the given layer. */ template class TDenseLayer : public VGeneralLayer { public: using Scalar_t = typename Architecture_t::Scalar_t; using Matrix_t = typename Architecture_t::Matrix_t; using Tensor_t = typename Architecture_t::Tensor_t; private: Tensor_t fInputActivation; ///< output of GEMM and input to activation function Tensor_t fDerivatives; ///< activation function gradient Scalar_t fDropoutProbability; ///< Probability that an input is active. EActivationFunction fF; ///< Activation function of the layer. ERegularization fReg; ///< The regularization method. Scalar_t fWeightDecay; ///< The weight decay. typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function public: /*! Constructor */ TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay); /*! Copy the dense layer provided as a pointer */ TDenseLayer(TDenseLayer *layer); /*! Copy Constructor */ TDenseLayer(const TDenseLayer &); /*! Destructor */ ~TDenseLayer(); /*! Compute activation of the layer for the given input. The input * must be in 3D tensor form with the different matrices corresponding to * different events in the batch. Computes activations as well as * the first partial derivative of the activation function at those * activations. */ void Forward(Tensor_t &input, bool applyDropout = false); /*! Compute weight, bias and activation gradients. Uses the precomputed * first partial derivatives of the activation function computed during * forward propagation and modifies them. Must only be called directly * a the corresponding call to Forward(...). */ void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward ); /// std::vector &inp1, std::vector &inp2); /*! Printing the layer info. */ void Print() const; /*! Writes the information and the weights about the layer in an XML node. */ virtual void AddWeightsXMLTo(void *parent); /*! Read the information and the weights about the layer from XML node. */ virtual void ReadWeightsFromXML(void *parent); /*! Set dropout probabilities */ virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; } /*! Getters */ Scalar_t GetDropoutProbability() const { return fDropoutProbability; } /* return output of Gemm before computing the activation function */ const Tensor_t &GetInputActivation() const { return fInputActivation; } Tensor_t &GetInputActivation() { return fInputActivation; } EActivationFunction GetActivationFunction() const { return fF; } ERegularization GetRegularization() const { return fReg; } Scalar_t GetWeightDecay() const { return fWeightDecay; } }; // // // The Dense Layer Class - Implementation //______________________________________________________________________________ template TDenseLayer::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init, Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg, Scalar_t weightDecay) : VGeneralLayer(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1, batchSize, width, init), fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay) { // should be {1, batchSize, width} but take from output fInputActivation = Tensor_t ( this->GetOutput().GetShape() ); fDerivatives = Tensor_t ( this->GetOutput().GetShape() ); Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF); } //______________________________________________________________________________ template TDenseLayer::TDenseLayer(TDenseLayer *layer) : VGeneralLayer(layer), fInputActivation( layer->GetInputActivation().GetShape() ), fDropoutProbability(layer->GetDropoutProbability()), fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay()) { fDerivatives = Tensor_t ( this->GetOutput().GetShape() ); Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF); } //______________________________________________________________________________ template TDenseLayer::TDenseLayer(const TDenseLayer &layer) : VGeneralLayer(layer), fInputActivation( layer->GetInputActivation()), fDropoutProbability(layer.fDropoutProbability), fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay) { fDerivatives = Tensor_t ( this->GetOutput().GetShape() ); Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF); } //______________________________________________________________________________ template TDenseLayer::~TDenseLayer() { // release activation descriptor Architecture_t::ReleaseDescriptor(fActivationDesc); } //______________________________________________________________________________ template auto TDenseLayer::Forward( Tensor_t &input, bool applyDropout) -> void { if (applyDropout && (this->GetDropoutProbability() != 1.0)) { // Architecture_t::DropoutForward(input, static_cast (nullptr), static_cast (nullptr), this->GetDropoutProbability()); } Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0)); Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0)); //evaluate(this->GetOutput(), this->GetActivationFunction()); Architecture_t::Copy(this->GetInputActivation(),this->GetOutput()); Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc); } //______________________________________________________________________________ template auto TDenseLayer::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void /// std::vector & /*inp1*/, std::vector & //// /*inp2*/) -> void { if (this->GetDropoutProbability() != 1.0) { Architecture_t::DropoutBackward(this->GetActivationGradients(), static_cast (nullptr), static_cast (nullptr)); } Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(), this->GetActivationGradients(), this->GetInputActivation(), this->GetActivationFunction(), fActivationDesc); Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0), fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0), activations_backward); addRegularizationGradients(this->GetWeightGradientsAt(0), this->GetWeightsAt(0), this->GetWeightDecay(), this->GetRegularization()); } //______________________________________________________________________________ template void TDenseLayer::Print() const { std::cout << " DENSE Layer: \t"; std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols(); // input size std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) "; std::vector activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" }; std::cout << "\t Activation Function = "; std::cout << activationNames[ static_cast(fF) ]; if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability; std::cout << std::endl; } //______________________________________________________________________________ template void TDenseLayer::AddWeightsXMLTo(void *parent) { // write layer width activation function + weight and bias matrices auto layerxml = gTools().xmlengine().NewChild(parent, nullptr, "DenseLayer"); gTools().xmlengine().NewAttr(layerxml, nullptr, "Width", gTools().StringFromInt(this->GetWidth())); int activationFunction = static_cast(this -> GetActivationFunction()); gTools().xmlengine().NewAttr(layerxml, nullptr, "ActivationFunction", TString::Itoa(activationFunction, 10)); // write weights and bias matrix this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0)); this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0)); } //______________________________________________________________________________ template void TDenseLayer::ReadWeightsFromXML(void *parent) { // Read layer weights and biases from XML this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0)); this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0)); } } // namespace DNN } // namespace TMVA #endif