library(lattice) makeToyData<-function(n, parameters,as.pvalues=TRUE, correlated=TRUE, correlatedInformative=TRUE, pNoize=0,pNoizeBiMod=0 ) { n1=n2=floor(n/2) class=c(rep('X',n1),rep('O',n2)) data=data.frame(class) ## x1.true, x2.true > x.true are the real distribution from the parameters for(param in parameters) { if(!as.pvalues) { moy=param x1.true=rnorm(n1,mean=-moy, sd=1) x2.true=rnorm(n2,mean= moy, sd=1) x.true=c(x1.true,x2.true) preffix=paste('moy',param,sep='') } else { epsilon=param/10 moy=getMean(p.val=param,n=(n1+n2)) while(TRUE) { x1.true=rnorm(n1,mean=-moy) x2.true=rnorm(n2,mean=+moy) pval=t.test(x1.true, x2.true)$p.value if (abs(param-pval) < epsilon) break } x.true=c(x1.true,x2.true) preffix=paste('pval',param,sep='') } x = x.true+rnorm(length(x.true), sd=0.1*sd(x.true)) x=data.frame(x) names(x)=preffix data=cbind(data,x) ## Correlated feature if(correlated) { xc = x.true+rnorm(length(x.true), sd=0.1*sd(x.true)) xc=data.frame(xc) names(xc)=paste(preffix,'c',sep='') data=cbind(data,xc) } if(correlatedInformative) { ## Correlated but informative feature x1ci = x1.true+moy+rnorm(length(x1.true), sd=moy/2)##0.1*sd(x1.true)) x2ci = x2.true-moy+rnorm(length(x2.true), sd=moy/2)##0.1*sd(x2.true)) xci=c(x1ci,x2ci) xci=data.frame(xci) names(xci)=paste(preffix,'ci',sep='') data=cbind(data,xci) } } ## Make some noize if(pNoize!=0) data=makeNoize(data,n,pNoize) if(pNoizeBiMod!=0) data=makeNoize(data,n,pNoizeBiMod,bimod=TRUE) return( data ) } ## Utils makeNoize<-function(data,n,p,bimod=FALSE) { for(i in 1:p) { if(!bimod) { x=rnorm(n,sd=sqrt(20)) preffix='normNoize' } else { x1=rnorm(n/2,mean=-3*sqrt(20),sd=sqrt(20)) x2=rnorm(n/2,mean= 3*sqrt(20),sd=sqrt(20)) x=sample(c(x1,x2)) preffix='biNormNoize' } x=data.frame(x) names(x)=paste(preffix,i,sep='') data=cbind(data,x) } return(data) } ## Return the mean (mu) for distribution N(-mu,sd=1) N(mu,sd=1) getMean<-function(p.val,n) { epsilon=p.val/10 n1=n2=floor(n/2) moy=1 while(TRUE) { sum.p.val=0 for(i in 1:100) { sum.p.val=sum.p.val+ t.test(rnorm(n1,mean=-moy), rnorm(n2,mean=moy))$p.value } p.val.esti=sum.p.val/100 cat('moy=',moy,'\t p.val.estimated=',p.val.esti,'\n') if (abs(p.val.esti-p.val) < epsilon) break if(p.val.esti