################################################################################## # High-dimensional t test for one-sample mean, two-sample or multiple sample means # x: the sample with the smallest sample size among all the samples # y.list: a list consisting of all other samples # mu: a claimed population mean for one-sample problem with default zero value ################################################################################## HDt.test=function(x,y.list=NULL,mu=0){ nx=dim(x)[1];px=dim(x)[2] #nx is the smallest sample size; p is the data dimension df=nx*(nx-1)/2-1 # degrees of freedom if(is.null(y.list)){ if(nx<3) stop("not enough 'x' vectors") x.mat=(x-mu)%*%t(x-mu) x.new=x.mat[upper.tri(x.mat)] ht.stat=mean(x.new)/sqrt(2/(nx*(nx-1))*1/df*sum((x.new-mean(x.new))^2)) # test stat pval=1-pt(ht.stat,df) # p-value }else{ n.sample=length(y.list) x.new=0 for(i in 1:n.sample){ y=y.list[[i]] ny=dim(y)[1] if(nx>ny) stop("sample size y should be greater than x") if(nx<3) stop("not enough 'x' vectors") x=x-sqrt(nx/ny)*y[1:nx,]+1/sqrt(nx*ny)*matrix(colSums(y[1:nx,]),ncol=px,nrow=nx,byrow=T)-1/ny*matrix(colSums(y),ncol=px,nrow=nx,byrow=T) x.mat=x%*%t(x) x.new=x.new+x.mat[upper.tri(x.mat)] } ht.stat=mean(x.new)/sqrt(2/(nx*(nx-1))*1/df*sum((x.new-mean(x.new))^2)) pval=1-pt(ht.stat,df) } fval=list(statistic=ht.stat,p.value=pval) return(fval) }