/*!
\file pls.h
\brief Partial Least Squares
\author Martin Peters
$Date: 2010/03/29 20:35:21 $
$Revision: 1.7 $
----------------------------------------------------------------------------
MTK++ - C++ package of modeling libraries.
Copyright (C) 2005-2006 (see AUTHORS file for a list of contributors)
This file is part of MTK++.
MTK++ is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
MTK++ is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lessser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see .
----------------------------------------------------------------------------
*/
#ifndef PLS_H
#define PLS_H
#include
#include
#include
#include
#include
#include
#include
#include
#include "Utils/constants.h"
#include "Utils/object.h"
#include "BaseStats.h"
// - BOOST - //
#include
#include
#include
#include
#include // for diagonal matrix
#include
#include "boost/numeric/bindings/traits/ublas_matrix.hpp"
#include "boost/numeric/bindings/traits/ublas_vector.hpp"
#include "table.h"
namespace ublas = boost::numeric::ublas;
namespace blas = boost::numeric::bindings::blas;
namespace MTKpp
{
class sheet;
//class table;
// ============================================================
// Class : pls()
// ------------------------------------------------------------
/*!
\class pls
\brief Partial Least Squares
\author Martin Peters
*/
// ============================================================
class pls : public BaseStats
{
public:
/*!
\brief pls Constructor
*/
pls();
/*!
\brief pls Constructor
\param Y Y matrix
\param X X matrix
\param method method
\param nlvs number of latent variables
\param cv cross validation method
\param bError error boolean
*/
pls(table* Y, table* X, std::string method, int nlvs, std::string cv,
bool& bError);
/*!
\brief pls Constructor
\param Y Y matrix
\param X X matrix
\param method method
\param nlvs number of latent variables
\param cv cross validation method
\param output sheet pointer
\param bError error boolean
*/
pls(table* Y, table* X, std::string method, int nlvs, std::string cv,
sheet* output, bool& bError);
/*!
\brief pls Constructor
\param Y Y name
\param X X name
\param S sheet pointer
\param method method
\param nlvs number of latent variables
\param cv cross validation method
\param output sheet pointer
\param bError error boolean
*/
pls(std::string Y, std::string X, sheet* S, std::string method, int nlvs,
std::string cv, sheet* output, bool& bError);
//! pls Destructor
//virtual ~pls();
/*!
\brief Run PLS
\param bError error boolean
*/
void run(bool& bError);
/*!
\brief Run CV PLS
\param bError error boolean
*/
void runCV(bool& bError);
/*!
\brief Set X matrix
\param x table pointer
*/
void setX(table* x);
/*!
\brief Set Y matrix
\param y table pointer
*/
void setY(table* y);
/*!
\brief Set PLS Algorithm
\param g PLS Algorithm
*/
void setMethod(std::string g);
/*!
\brief Set Maximum number of iterations for iterative methods
\param i max number of iterations
*/
void setMaxIter(int i);
/*!
\brief Set Convergence criteria for iterative methods
\param e Convergence criteria
*/
void setEpsilon(double e);
/*!
\brief Set Cross validation method
\param c CV method
*/
void setCV(std::string c);
/*!
\brief Set number of samples to consider in RANDOM CV
\param i number of samples
*/
void setNITER(int i);
/*!
\brief Set size of test set in RANDOM and LNO CV
\param i size of test set
*/
void setNTEST(int i);
/*!
\brief Set random number generator seed in RANDOM CV
\param s random number generator seed
*/
void setSEED(int s);
/*!
\brief Set number of Latent Variables to be considered
\param l number of LVs
*/
void setNLV(int l);
/*!
\brief The sheet where the model is stored
\param s sheet pointer
*/
void setOutModel(sheet* s);
protected: // Functions
/*!
\brief Kernel Partial Least Squares
Y[N][M] X[N][R]
+- -+ +- -+
| Y11 Y21 . . Y1M | | X11 X12 . . X1R |
| Y21 Y22 . . Y2M | | X21 X22 . . X2R |
| . . . . . | | . . . . . |
| . . . . . | | . . . . . |
| YN1 Y2N . . YNM | | XN1 . . . XNR |
+- -+ +- -+
PLS regression searches for a set of components (latent vectors)
that performs a simultaneous decomposition of X and Y with the
constraint that these components explain as much as possible of
the covariance between them. Then a regression step, where the
decomposition of X is used to predict Y is performed.
T -- Score Matrix for X T[N][MaxComponents]
U -- Score Matrix for Y U[N][MaxComponents]
P -- Loading Matrix for X P[R][MaxComponents]
C -- Weight Matrix for Y C[M][MaxComponents]
W -- Weighting Matrix W[R][MaxComponents]
E -- Residual Matrix for X E[N][R]
F -- Residual Matrix for Y F[N][M]
B -- Regression Coefficients B[N][1]
X = TP' + E
Y = TC' + E
Y_pred = X*B + E
where:
B = W * inv(P' * W) * C'
-- Reference: Herve Abdi, Partial Least Squares (PLS) Regression,
University of Texas at Dallas
*/
int kernelPLS();
protected: // Data
/*!
\brief X matrix
\code
X[N][R]
+- -+
| X11 X12 . . X1R |
| X21 X22 . . X2R |
| . . . . . |
| . . . . . |
| XN1 . . . XNR |
+- -+
\endcode
*/
table* itsX;
/*!
\brief Y matrix
\code
Y[N][M]
+- -+
| Y11 Y21 . . Y1M |
| Y21 Y22 . . Y2M |
| . . . . . |
| . . . . . |
| YN1 Y2N . . YNM |
+- -+
\endcode
*/
table* itsY;
/*!
\brief Number of rows in Y and X
*/
unsigned int YRows;
/*!
\brief Number of Columns in X
*/
unsigned int XColumns;
/*!
\brief PLS Algorithm
- KERNELPLS
- SIMPLS
- NIPLS
*/
std::string itsMethod;
/*!
\brief Maximum number of iterations for iterative methods
*/
unsigned int maxIter;
/*!
\brief Convergence criteria for iterative methods
*/
double epsilon;
/*!
\brief Cross validation parameters
- NONE : No CV
- LOO : Leave One Out cross validation
- LNO : Leave N Out cross validation, set nTEST parameter
- RANDOM : Leave nTest out nITER times using SEED
*/
std::string CV;
/*!
\brief Number of prediction set in CV
nEXT = N/nTest
*/
unsigned int nEXT;
/*!
\brief Number of samples to consider in RANDOM CV
*/
unsigned int nITER;
/*!
\brief Size of test set in RANDOM and LNO CV
if N = 100
- LOO: nTest = 1
- LNO: nTest = 10 ==> nExt = 10
- RANDOM: nTest = 10 ==> nExt = 10
*/
unsigned int nTEST;
/*!
\brief Random number generator seed in RANDOM CV
*/
int SEED;
/*!
\brief Number of Latent Variables to be considered
*/
unsigned int nLV;
/*!
\brief The sheet where the model is stored
*/
sheet* outModel;
};
} // MTKpp namespace
#endif // PLS_H