# # QEIF.R (May 2013, by Xiaoling Dou) # ############################################################ ############################################################ ### find the Quadratic EIF (QEIF) ########################## ############################################################ # This function requires a dataset, chromosome number and # # loci. # # The loci should be a vector consisting of integers no # # larger than the total number of loci. # # The dataset should contain ID, phenotype, sex and # # genotypes. # # The first row of the dataset is the discription of each # # column, the second row contains the names of chromosomes,# # and the third row contains the locations of marker loci. # ############################################################ ############################################################ #rm(list=ls(all=TRUE)) QEIFs<- function(X=dataset, chromosome, loci){ # if (length(loci) != length(cl)) stop("length(loci) should be equal to length(cl) !") ### QTL data arrangement ### NM<- dim(X) N<- NM[1]-3; M<- NM[2]-3 #N: sample size; M: number of loci# geno<- matrix(0, nrow=N, ncol=M) X2<- X[-c(1:3),-c(1:3)] for(i in 1:M){ x0<- as.vector(X2[,i]) x1<- replace(x0, which(x0=="A"), -1) x2<- replace(x1, which(x1=="B"), 1) x3<- replace(x2, which(x2=="H"), 0) geno[,i]<- as.numeric(x3) #genotypes, N*M matrix# } X23<- X[2:3,-c(1:3)] #chromosome number and locations# chr.location<- paste(X23[1,], X23[2,], sep="_") colnames(geno)<- chr.location #chromosome and the locations# pheno<- as.numeric(X[-c(1:3), 2]) #phenotype: pheno# sex<- as.numeric(X[-c(1:3), 3]) #sex of the mice# chro<- which(X23[1,]==chromosome) loca.chr<- as.numeric(X23[2, chro]) #locations on the given chromosome# x<- cbind(pheno, sex, geno) #phenotype, sex and genotypes: N*(M+2) matrix# ### Find Lod scores, parameters and EIFs ### Lod<- numeric(M); theta1<- matrix(0, 5, M); logf1<- matrix(0, N, M) y<- pheno #phenotype# ui<- rep(1, N) vi<- sex #sex# xi0<- cbind(ui,vi) bh0<- solve(t(xi0) %*% xi0) %*% t(xi0) %*% y #coefficients under H0# e0<- y- xi0 %*% bh0 L0<- t(e0) %*% e0 sig0<- as.vector(L0/N) #sig0 is the squared sigma0.# theta0<- c(bh0, sig0) #parameters under H0# logc0<- (-log(2 * pi * sig0))/2 ; loge0<- (y-as.vector(xi0 %*% bh0))^2 /(-2* sig0) logf0<- logc0+loge0 ei<- matrix(0, nrow=N, ncol=M) for (i in 1:M){ zi<- geno[,i] #genotypes:z# wi<- 2* (zi^2) -1 #w# xi1<- cbind(zi,wi,ui,vi) ## bh1<- solve(t(xi1) %*% xi1)%*% t(xi1) %*% y #coefficients under H1# ei[,i]<- y - xi1 %*% bh1 L1<- t(ei[,i])%*% ei[,i] sig1<- as.vector(L1/N) ; theta1[,i]<- c(bh1, sig1) #sig1 is the squared sigma1; theta1 is the parameters under H1.# Lod[i]<- log((L0 / L1)^(N / 2))/log(10) #This is the LOD score.# logc1<- (-log(2 * pi * sig1))/2 ; loge1<- (y-(xi1 %*% bh1))^2 /(-2* sig1) logf1[,i]<- logc1+loge1 } logD<- (logf1 - logf0)*N / log(10) #Using it to find EIF of the lod scores.# EIF<- t(t(logD)- Lod) #EIF(N*M) is the empirical influence matrix of all the individuals for all LOD scores.# ### Specify the chromosome and the loci ### m <- length(chro) if (sum(loci > m ) >= 1) stop(paste("loci should be a vector containing some integers no larger than ", m, "!")) Eif <- EIF[,chro[loci]] Rifun<- function(i){ #i is the index of the loci; equation (18) in the paper.# u<- rep(1, N); v<- sex U<- cbind(u,v) zi<- geno[,i]; wi<- 2*((zi)^2) -1 ZiU <- cbind(zi, wi, U) Hi1 <- ZiU %*% solve(t(ZiU) %*% ZiU ) %*% t(ZiU) H0 <- U %*% solve(t(U) %*% U) %*% t(U) Ri<- Hi1-H0 return(Ri) } k<- length(loci) #Find Cov,the equation (17), in the paper.# Cov<- matrix(0, nrow=k, ncol=k ) R<- list(); r<- 0 for(i in chro[loci]){ r<- r+1 R[[r]]<- Rifun(i) } for(i in 1:k){ for(j in 1:k){ Cov[i,j]<- sum(diag( R[[i]] %*% R[[j]] )) * 2/ ( 2* log(10) )^2 } } ICov<- solve(Cov) #ICov is the inverse matrix of Cov.# vec1<- rep(1,k) #Use matrix J to remove the parallel shift.# J<- diag(k)- ( vec1 %*% t(vec1) %*% ICov ) /as.vector(t(vec1) %*% ICov %*% vec1) JCJ<- J %*% Cov %*% t(J) require(MASS) Gjcj<- ginv(JCJ) #Moore-Penrose generalized invers of the matrix JCJ # SgiS<- Eif %*% t(J) %*% Gjcj %*% J %*% t(Eif) Qeif<- diag(SgiS) return(list(Lod=Lod, EIF=EIF, J=J, Qeif=Qeif)) #Lod scores, EIF, J and Qeif are returned.# } ############################################################ #A function for plotting quadratic EIFs# #The figure shows the first 3 individuals having large QEIF values.# plot.qeif<- function(qeif=QEIF, mx=3){ m.qeif<- max(QEIF)+10 m.x<- which(rank(QEIF)> length(QEIF)-mx) oldpar<- par(no.readonly=TRUE) on.exit(par(oldpar)) par(cex=1.2) plot( QEIF, ylim=c(0, m.qeif), xlab="Index", ylab="QEIF") text(m.x , QEIF[m.x]-900, paste(m.x, " (", round(QEIF[m.x],0), ")"),col=1,cex=0.6) par(oldpar) } ############################################################ ############################################################ ### dataset ### X1<-matrix(scan("Data_LogADI.txt", what="character"), 173, byrow=TRUE) ### Input the dataset, specify chromosome and marker loci. ### qeifs<- QEIFs(X=X1, chromosome=3, loci=c(3,4,5)) QEIF <- qeifs$Qeif # Plot quadratic EIFs # plot.qeif(QEIF, mx=3) ############################################################