""" One-dimensional Gamma-Gaussian mixture density classes : Given a set of points the algo provides approcumate maximum likelihood estimates of the mixture distribution using an EM algorithm. Author: Bertrand Thirion and Merlin Keller 2005-2008 """ import numpy as np import scipy.stats as st import scipy.special as sp import numpy.random as nr import scipy.optimize.optimize as so # ------------------------------------------------------------ # ------ auxiliary functions --------------------------------- # ------------------------------------------------------------ def _dichopsi_log(u,v,y,eps = 0.00001): """ this function implements the dichotomic part of the solution of the psi(c)-log(c)=y """ if u>v: s = u u = v v = s t = (u+v)/2 if np.absolute(u-v)y: return _dichopsi_log(u,t,y,eps) else: return _dichopsi_log(t,v,y,eps) def _psi_solve(y,eps = 0.00001): """ This function solves solve psi(c)-log(c)=y by dichotomy """ if y>0: print "y", y raise ValueError, "y>0, the problem cannot be solved" u = 1. if y>sp.psi(u)-sp.log(u): while sp.psi(u)-sp.log(u)y: u = u/2 return _dichopsi_log(u,2*u,y,eps) def _compute_c(x,z,eps = 0.00001): """ this function returns the mle of the shape parameter if a 1D gamma density """ eps = 1.e-7 y = np.dot(z,np.log(x))/np.sum(z) - np.log(np.dot(z,x)/np.sum(z)) if y>-eps: c = 10 else: c = _psi_solve(y,eps = 0.00001) return c class Gamma: """ This is the basic one dimensional Gaussian-Gamma Mixture estimation class Note that it can work with positive or negative values, as long as there is at least one positive value. NB : The gamma distribution is defined only on positive values. 5 parameters are used: - mean: gaussian mean - var: gaussian variance - shape: gamma shape - scale: gamma scale - mixt: mixture parameter (weight of the gamma) """ def __init__(self, shape=1, scale=1): self.shape = shape self.scale = scale def parameters(self): print "shape: ", self.shape, "scale: ", self.scale def check(self,x): if (x.min()<0): raise ValueError, "negative values in input" def estimate(self,x, eps = 1.e-7): """ ML estimation of the Gamma parameters """ self.check(x) n = np.size(x) y = np.sum(np.log(x))/n - np.log(np.sum(x)/n) if y>-eps: self.shape = 1 else: self.shape = _psi_solve(y) self.scale = np.sum(x)/(n*self.shape) # ------------------------------------------------------------ # ------ Gamma-Gaussian Mixture class------------------------- # ------------------------------------------------------------ class GGM: """ This is the basic one dimensional Gaussian-Gamma Mixture estimation class Note that it can work with positive or negative values, as long as there is at least one positive value. NB : The gamma distribution is defined only on positive values. 5 scalar members - mean: gaussian mean - var: gaussian variance (non-negative) - shape: gamma shape (non-negative) - scale: gamma scale (non-negative) - mixt: mixture parameter (non-negative, weight of the gamma) """ def __init__(self, shape=1, scale=1,mean=0, var=1, mixt=0.5): self.shape = shape self.scale = scale self.mean = mean self.var = var self.mixt = mixt def parameters(self): """ print the paramteres of self """ print "Gaussian: mean: ", self.mean, "variance: ", self.var print "Gamma: shape: ", self.shape, "scale: ", self.scale print "Mixture gamma: ",self.mixt, "Gaussian: ",1-self.mixt def check(self,x): """ Check function: """ if (x.max()>0): return True else: raise ValueError, "all values are negative or null. Cannot proceed" def Mstep(self,x,z): """ Mstep of the model: maximum likelihood estimation of the parameters of the model Parameters: ----------- - x array of shape(nbitems), input data - z array of shape(nbitrems,2): the membership matrix """ # z[0,:] is the likelihood to be generated by the gamma # z[1,:] is the likelihood to be generated by the gaussian sz = np.sum(z,0) sz = sz+(sz==0) i = np.nonzero(x>0) i = np.reshape(i,(np.size(i))) self.shape = _compute_c(x[i],z[i,0],eps = 0.00001) self.scale = np.dot(x[i],z[i,0])/(sz[0]*self.shape) self.mean = np.dot(x,z[:,1])/sz[1] self.var = np.dot((x-self.mean)**2,z[:,1])/sz[1] def Estep(self,x): """ E step of the estimation: Estimation of ata membsership Parameters ---------- x array of shape(nbitems), input data Results: ----------- - z array of shape(nbitrems,2): the membership matrix """ z = np.zeros((np.size(x),2),'d') a = self.shape b = self.scale eps = 1.e-7 # t = x/self.scale # y = a*np.log(a)-np.log(sp.gamma(a))+(a-1)*np.log(t)-a*t-np.log(b); i = np.nonzero(x>0) i = np.reshape(i,(np.size(i))) y = -a*np.log(b)-np.log(sp.gamma(a))+(a-1)*np.log(x[i])-x[i]/b; z[i,0] = np.exp(y); print z m = self.mean v = self.var z[:,1]= 1./np.sqrt(2*np.pi*v)*np.exp(-(x-m)**2/(2*v)) R = np.array([self.mixt,1.-self.mixt]) z = z*R z = np.maximum(z,eps) L = np.sum(np.log(np.sum(z,1)))/np.size(x) sz = np.sum(z,1) sz = sz+(sz==0) z = np.transpose(np.transpose(z)/sz) R = np.sum(z,0)/np.size(x) self.mixt = R[0] return z,L def estimate(self, x, niter=100, delta=0.0001, verbose=False): """ Complete EM estimation procedure Parameters: ------------- - x: array of shape(nbitems): the data to be processed - niter = 100: max nb of iterations - delta = 0.001: criterion for convergence """ if self.check(x): z,L = self.Estep(x) L0 = L-2*delta for i in range(niter): self.Mstep(x,z) z,L = self.Estep(x) if verbose: print i,L if (L0) i = np.reshape(i,(np.size(i))) z = np.zeros(np.size(c),'d') lz = -a*np.log(b)-np.log(sp.gamma(a))+(a-1)*np.log(c[i])-c[i]/b; z[i] = np.exp(lz)*p*dc import matplotlib.pylab as mp mp.figure() mp.plot(0.5 *(c[1:] + c[:-1]),h) mp.plot(c,y,'r') mp.plot(c,z,'g') mp.plot(c,z+y,'k') mp.title('Fit of the density with a Gamma-Gaussians mixture') mp.legend(('data','gaussian acomponent','gamma component', 'mixture distribution')) def posterior(self,x): """ Compute the posterior probability of observing the data x under the two components Parameters: ------------- - x: array of shape(nbitems): the data to be processed Results: --------- - y,pg : arrays of shape (nbitem) corresponding to the posterior """ p = self.mixt #gaussian part m = self.mean v = self.var y = (1 - p)*1./np.sqrt(2*np.pi*v)*np.exp(-(x-m)**2/(2*v)) # positive gamma b = self.scale a = self.shape pg = np.zeros(np.size(x),'d') i = np.nonzero(x>0); if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(x[i])-x[i]/b; pg[i] = np.exp(lz)*p return y,pg # ------------------------------------------------------------ # ------ double-Gamma-Gaussian Mixture class------------------ # ------------------------------------------------------------ class GGGM: """ This is the basic one dimensional Gamma-Gaussian-Gamma Mixture estimation class, where the frist gamma has a negative sign, while the second one has a positive sign 7 parameters are used: - shape_n: negative gamma shape - scale_n: negative gamma scale - mean: gaussian mean - var: gaussian variance - shape_p: positive gamma shape - scale_p: positive gamma scale - mixt: array of mixture parameter (weights of the n-gamma,gaussian and p-gamma) """ def __init__(self, shape_n=1, scale_n=1,mean=0,var=1, shape_p=1, scale_p=1, mixt=np.array([1.0,1.0,1.0])/3): """ initialization of the class Parameters ----------- - shape_n=1, scale_n=1: parameters of the nehative gamma Note that they should be positive - mean=0,var=1: parameters of the gaussian var>0 - shape_p=1, scale_p=1: parameters of the positive gamma Note that they should be positive - mixt=np.array([1.0,1.0,1.0])/3 the mixing proportions they should be positive and sum to 1 """ self.shape_n = shape_n self.scale_n = scale_n self.mean = mean self.var = var self.shape_p = shape_p self.scale_p = scale_p self.mixt = mixt def parameters(self): """ Simply print the parameters """ print "Negative Gamma: shape: ", self.shape_n,\ "scale: ", self.scale_n print "Gaussian: mean: ", self.mean, "variance: ", self.var print "Poitive Gamma: shape: ", self.shape_p, "scale: ",\ self.scale_p mixt = self.mixt print "Mixture neg. gamma: ",mixt[0], "Gaussian: ",mixt[1],\ "pos. gamma: ",mixt[2] def check(self,x): """Not implemented yet """ pass def init(self,x,mixt=None): """ initialization of the differnt parameters Parameters: ------------- - x: array of shape(nbitems): the data to be processed - mixt=None array of shape(3): prior mixing proportions if mixt==None, the classes ahev equal weight """ if mixt!=None: if np.size(mixt)==3: self.mixt = np.ravel(mixt) else: raise ValueError, 'bad size for mixt' # gaussian self.mean = np.mean(x) self.var = np.var(x) # negative gamma i = np.nonzero(x<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) mn = -np.mean(x[i]) vn = np.var(x[i]) self.scale_n = vn/mn self.shape_n = mn/self.scale_n else: self.mixt[0] = 0 # positive gamma i = np.nonzero(x>0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) mp = np.mean(x[i]) vp = np.var(x[i]) self.scale_p = vp/mp self.shape_p = mp/self.scale_p else: self.mixt[0] = 0 self.mixt = self.mixt/np.sum(self.mixt) def init_fdr(self, x, dof=-1): """ Initilization of the class based on a fdr heuristic: the probability to be in the positive component is proportional to the 'positive fdr' of the data. The same holds for the negative part. The point is that the gamma parts should model nothing more that the tails of the distribution. Parameters: ----------- - x: array of shape(nbitem): the data under consideration - dof=-1: number of degrees of freedom if x is thought to be a student variate. By default, it is handeled as a normal """ # positive gamma i = np.nonzero(x>0) from nipy.neurospin.utils import emp_null as en lfdr = en.FDR(x) if np.size(i)>0: i = np.reshape(i,(np.size(i))) if dof<0: pvals = st.norm.sf(x) else: pvals = st.t.sf(x,dof) q = lfdr.all_fdr_from_pvals(pvals) z = 1-q[i] sz = np.sum(z) if sz==0: self.mixt[2] = 0.5/np.size(x) else: if sz>0.5*np.size(i): z = 0.5*z sz = np.sum(z) self.shape_p = _compute_c(x[i],z,eps = 0.00001) self.scale_p = np.dot(x[i],z)/(sz*self.shape_p) self.mixt[2] = sz/np.size(i) else: self.mixt[2] = 0 # negative gamma i = np.nonzero(x<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) if dof<0: pvals = st.norm.cdf(x) else: pvals = st.t.cdf(x,dof) q = lfdr.all_fdr_from_pvals(pvals) z = 1-q[i] sz = np.sum(z) if sz==0: self.mixt[0] = 0.5/np.size(x) else: if sz>0.5*np.size(i): z = 0.5*z sz = np.sum(z) self.shape_n = _compute_c(-x[i],z,eps = 0.00001) self.scale_n = -np.dot(x[i],z)/(sz*self.shape_n) self.mixt[0] = sz/np.size(i) else: self.mixt[0] = 0 self.mixt[1] = 1-self.mixt[0]-self.mixt[2] def Mstep(self,x,z): """ Mstep of the estimation: Maximum likelihood update the parameters of the three components Parameters ------------ x array of shape(nbitem): input data z array of shape =(nbitems,3): probabilistic membership """ tiny = 1.e-15 sz = np.maximum(np.sum(z,0),tiny) #negative gamma i = np.nonzero(x<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) self.shape_n = _compute_c(-x[i],z[i,0],eps = 0.00001) self.scale_n = np.dot(-x[i],z[i,0])/(sz[0]*self.shape_n) else: self.shape_n = 1 self.scale_n = 1 #gaussian self.mean = np.dot(x,z[:,1])/sz[1] self.var = np.dot((x-self.mean)**2,z[:,1])/sz[1] #positive gamma i = np.nonzero(x>0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) self.shape_p = _compute_c(x[i],z[i,2],eps = 0.00001) self.scale_p = np.dot(x[i],z[i,2])/(sz[2]*self.shape_p) else: self.shape_p = 1 self.scale_p = 1 def Estep(self,x): """ E step: update probabilistic memberships of the three components Parameters: ------------- x: array of shape (nbitems), the input data Results: ---------- z: probabilistic membership, shape =(nbitems,3) NOTE: - z[0,:] is the membership the negative gamma - z[1,:] is the membership of the gaussian - z[2,:] is the membership of the positive gamma """ z = np.zeros((np.size(x),3),'d') # negative gamma a = self.shape_n b = self.scale_n i = np.nonzero(x<0) i = np.reshape(i,(np.size(i))) swx = -np.array(x[i]) y = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(swx)-swx/b z[i,0] = np.exp(y); # gaussian m = self.mean v = self.var z[:,1]= 1./np.sqrt(2*np.pi*v)*np.exp(-(x-m)**2/(2*v)) # positive gamma a = self.shape_p b = self.scale_p i = np.nonzero(x>0) i = np.reshape(i,(np.size(i))) swx = np.array(x[i]) y = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(swx)-swx/b; z[i,2] = np.exp(y); # normalization etc. R = self.mixt z = z*R L = np.sum(np.log(np.sum(z,1)))/np.size(x) sz = np.sum(z,1) sz = sz+(sz==0) z = (z.T/sz).T R = np.sum(z,0)/np.size(x) self.mixt = R return z,L def estimate(self, x, niter=100, delta=1.e-4, bias=0,verbose=0): """ Whole EM estimation procedure: Parameters: ------------ - x: array of shape (nbitem): input data - niter = 100 : max number of iterations - delta = 1.e-4: increment in LL at xhich convergence is declared - bias=0 : lower bound on the gaussian variance (to avoid shrinkage) - verbose=1: verbosity level Results: --------- - z array of shape(nbitem,3): the membership matrix """ z,L = self.Estep(x) L0 = L-2*delta for i in range(niter): self.Mstep(x,z) if bias>0: self.var = np.maximum(bias,self.var) z,L = self.Estep(x) if verbose: print i, L if (L0: self.var = np.maximum(bias,self.var) return z def posterior(self,x): """ ng,y,pg = self.posterior(x) Compute the posterior probability of observing the data x under the three components negative gamma, gaussina, positive gaussian """ p = self.mixt # negative gamma b = self.scale_n a = self.shape_n ng = np.zeros(np.size(x),'d') i = np.nonzero(x<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(-x[i])+x[i]/b; ng[i] = np.exp(lz)*p[0] #gaussian part m = self.mean v = self.var y = p[1]*1./np.sqrt(2*np.pi*v)*np.exp(-(x-m)**2/(2*v)) # positive gamma b = self.scale_p a = self.shape_p pg = np.zeros(np.size(x),'d') i = np.nonzero(x>0); if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(x[i])-x[i]/b; pg[i] = np.exp(lz)*p[2] return ng,y,pg def component_likelihood(self,x): """ Compute the likelihood of the data x under the three components negative gamma, gaussina, positive gaussian Parameters: ----------- - x: array of shape(nbitem): the data under evaluation Results: -------- - ng,y,pg: three arrays of shape(nbitem) that ggive the likelihood of the data under the 3 components Note: ------ ng+y+pg = np.ones(nbitem) """ p = self.mixt # negative gamma b = self.scale_n a = self.shape_n ng = np.zeros(np.size(x),'d') i = np.nonzero(x<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(-x[i])+x[i]/b; ng[i] = np.exp(lz) #gaussian part m = self.mean v = self.var y = 1./np.sqrt(2*np.pi*v)*np.exp(-(x-m)**2/(2*v)) # positive gamma b = self.scale_p a = self.shape_p pg = np.zeros(np.size(x),'d') i = np.nonzero(x>0); if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(x[i])-x[i]/b; pg[i] = np.exp(lz) sl = ng+y+pg ng = ng/sl y = y/sl pg = pg/sl return ng,y,pg def show(self, x, mpaxes=None): """ vizualization of the Mixture, superimposed on the empirical histogram of x Parameters: ----------- - x : array of data of shape(nbitem) - mpaxes=None: axes handle used for the plot if None, new axes are created """ step = 3.5*np.std(x)/np.exp(np.log(np.size(x))/3) bins = max(10,int((x.max()-x.min())/step)) h,c = np.histogram(x, bins) h = h.astype('d')/np.size(x) dc = c[1]-c[0] p = self.mixt # negative gamma b = self.scale_n a = self.shape_n ng = np.zeros(np.size(c),'d') i = np.nonzero(c<0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(-c[i])+c[i]/b; ng[i] = np.exp(lz)*p[0] #gaussian part m = self.mean v = self.var y = p[1]*1./np.sqrt(2*np.pi*v)*np.exp(-(c-m)**2/(2*v)) # positive gamma b = self.scale_p a = self.shape_p pg = np.zeros(np.size(c),'d') i = np.nonzero(c>0) if np.size(i)>0: i = np.reshape(i,(np.size(i))) lz = -a*np.log(b)-sp.gammaln(a)+(a-1)*np.log(c[i])-c[i]/b; pg[i] = np.exp(lz)*p[2] z = y+pg+ng import matplotlib.pylab as mp if mpaxes==None: mp.figure() ax = mp.subplot(1,1,1) else: ax = mpaxes ax.plot(0.5 *(c[1:] + c[:-1]),h/dc,linewidth=2) ax.plot(c,ng,'c',linewidth=2) ax.plot(c,y,'r',linewidth=2) ax.plot(c,pg,'g',linewidth=2) ax.plot(c,z,'k',linewidth=2) ax.set_title('Fit of the density with a Gamma-Gaussian mixture', fontsize=16) l = ax.legend(('data','negative gamma component', 'gaussian component','positive gamma component', 'mixture distribution')) for t in l.get_texts(): t.set_fontsize(12) ax.set_xticklabels(ax.get_xticks(), fontsize=16) ax.set_yticklabels(ax.get_yticks(), fontsize=16) def SAEM(self, x, burnin=100, niter=200, verbose=False): """ SAEM estimation procedure: Parameters: ------------- - x: vector of observations - burnin: number of burn-in SEM iterations (discarded values) - niter: maximum number of SAEM iterations for convergence to local maximum - verbose (0/1): verbosity level Gaussian disribution mean is fixed to zero Results: --------- -LL: successive log-likelihood values """ #Initialization n = len(x) #Averaging steps step = np.ones(burnin+niter) step[burnin:] = 1/(1.0 + np.arange(niter)) # Posterior probabilities of class membership P = np.zeros((3,n)) # Complete model sufficient statistics s = np.zeros((8,1)) # Averaged sufficient statistics A = np.zeros((8,1)) # Successive parameter values LL = np.zeros(burnin+niter) # Mean fixed to zero self.mean = 0 #Main loop for t in xrange(burnin+niter): #E-step ( posterior probabilities ) P[0] = st.gamma.pdf( -x,self.shape_n,scale=self.scale_n ) P[1] = st.norm.pdf( x,0,np.sqrt(self.var) ) P[2] = st.gamma.pdf( x,self.shape_p,scale=self.scale_p ) P *= self.mixt.reshape(3,1) P /= P.sum(axis=0) #S-step ( class label simulation ) Z = np.zeros(n) u = nr.uniform(size=n) Z[u1-P[2]] += 1 #A-step ( sufficient statistics ) s[0] = (Z == 0).sum() s[1] = (Z == +1).sum() s[2] = (Z == -1).sum() s[3] = np.abs( x[Z == +1] ).sum() s[4] = np.abs( x[Z == -1] ).sum() s[5] = ( x[Z == 0]**2 ).sum() s[6] = np.log( np.abs( x[Z == +1] ) ).sum() s[7] = np.log( np.abs( x[Z == -1] ) ).sum() # Averaged sufficient statistics A = A + step[t] * (s - A) #M-step ( Maximization of expected log-likelihood ) self.var = A[5]/A[0] def Qp(Y): """ Expected log_likelihood of positive gamma class """ return A[6]*(1-Y[0])+A[3]*Y[1]-A[1]*(Y[0]*np.log(Y[1]) -np.log(sp.gamma(Y[0]))) def Qn(Y): """ Expected log_likelihood of negative gamma class """ return A[7]*(1-Y[0])+A[4]*Y[1]-A[2]*(Y[0]*np.log(Y[1]) -np.log(sp.gamma(Y[0]))) Y = so.fmin( Qp, [self.shape_p, 1/self.scale_p], disp=0) self.shape_p = Y[0] self.scale_p = 1/Y[1] Y = so.fmin( Qn, [self.shape_n, 1/self.scale_n], disp=0) self.shape_n = Y[0] self.scale_n = 1/Y[1] self.mixt = np.array([A[2],A[0],A[1]]) /n LL[t] = np.log(np.array(self.posterior(x)).sum(axis=0)).sum() if verbose: print "Iteration "+str(t)+" out of "+str(burnin+niter), print "LL = ", str(LL[t]) self.mixt=self.mixt.squeeze() return LL def ROC(self, x): """ ROC curve for seperating positive Gamma distribution from two other modes, predicted by current parameter values -x: vector of observations Output: P P[0]: False positive rates P[1]: True positive rates """ import matplotlib.pylab as mp p = len(x) P = np.zeros((2,p)) #False positives P[0] = (self.mixt[0]*st.gamma.cdf(-x,self.shape_n,scale=self.scale_n) + self.mixt[1]*st.norm.sf(x,0,np.sqrt(self.var)))/\ (self.mixt[0] + self.mixt[1]) #True positives P[1] = st.gamma.sf(x,self.shape_p,scale=self.scale_p) mp.figure() I = P[0].argsort() mp.plot(P[0,I],P[0,I],'r-') mp.plot(P[0,I],P[1,I],'g-') mp.legend(('False positive rate','True positive rate')) return P