# # Orthogonal.R (May 2013, by Xiaoling Dou) # ############################################################ ############################################################ ### Find EIFC by the orthogonal polynomial method (EIFC) ### ############################################################ # This function requires a dataset, chromosome number, loci# # and the power of the orthogonal polynomial. # # The loci should be a vector consisting of integers no # # larger than the total number of loci. # # The dataset should contain ID, phenotype, sex and # # genotypes. # # The first row of the dataset is the description of each # # column, the second row contains the name of chromosomes, # # and the third row contains the locations of marker loci. # ############################################################ ############################################################ #rm(list=ls(all=TRUE)) orthogonal.f<- function(X=dataset, chromosome, loci, power){ ### QTL data arrangement ### NM<- dim(X) N<- NM[1]-3; M<- NM[2]-3 #N: sample size; M: number of loci# geno<- matrix(0, nrow=N, ncol=M) X2<- X[-c(1:3),-c(1:3)] for(i in 1:M){ x0<- as.vector(X2[,i]) x1<- replace(x0, which(x0=="A"), -1) x2<- replace(x1, which(x1=="B"), 1) x3<- replace(x2, which(x2=="H"), 0) geno[,i]<- as.numeric(x3) #genotypes, N*M matrix# } X23<- X[2:3,-c(1:3)] #chromosome number and locations# chr.location<- paste(X23[1,], X23[2,], sep="_") colnames(geno)<- chr.location #chromosome and the locations# pheno<- as.numeric(X[-c(1:3), 2]) #phenotype: pheno# sex<- as.numeric(X[-c(1:3), 3]) #sex of the mice# chro<- which(X23[1,]==chromosome) loca.chr<- as.numeric(X23[2, chro]) #locations on the given chromosome# x<- cbind(pheno, sex, geno) #phenotype, sex and genotypes: N*(M+2) matrix# ### Find Lod scores, parameters and EIFs ### Lod<- numeric(M); theta1<- matrix(0, 5, M); logf1<- matrix(0, N, M) y<- pheno #phenotype# ui<- rep(1, N) vi<- sex #sex# xi0<- cbind(ui,vi) bh0<- solve(t(xi0) %*% xi0) %*% t(xi0) %*% y #coefficients under H0# e0<- y- xi0 %*% bh0 L0<- t(e0) %*% e0 sig0<- as.vector(L0/N) #sig0 is the squared sigma0.# theta0<- c(bh0, sig0) #parameters under H0# logc0<- (-log(2 * pi * sig0))/2 ; loge0<- (y-as.vector(xi0 %*% bh0))^2 /(-2* sig0) logf0<- logc0+loge0 ei<- matrix(0, nrow=N, ncol=M) for (i in 1:M){ zi<- geno[,i] #genotypes:z# wi<- 2* (zi^2) -1 #w# xi1<- cbind(zi,wi,ui,vi) ## bh1<- solve(t(xi1) %*% xi1)%*% t(xi1) %*% y #coefficients under H1# ei[,i]<- y - xi1 %*% bh1 L1<- t(ei[,i])%*% ei[,i] sig1<- as.vector(L1/N) ; theta1[,i]<- c(bh1, sig1) #sig1 is the squared sigma1; theta1 is the parameters under H1.# Lod[i]<- log((L0 / L1)^(N / 2))/log(10) #This is the LOD score.# logc1<- (-log(2 * pi * sig1))/2 ; loge1<- (y-(xi1 %*% bh1))^2 /(-2* sig1) logf1[,i]<- logc1+loge1 } logD<- (logf1 - logf0)*N / log(10) #Using it to find EIF of the lod scores.# EIF<- t(t(logD)- Lod) #EIF(N*M) is the empirical influence matrix of all the individuals for all LOD scores.# ### Specify the chromosome and the loci ### m <- length(chro) if (sum(loci > m ) >= 1) stop(paste("loci should be a vector containing some integers no larger than ", m, "!")) Eif <- EIF[,chro[loci]] Rifun<- function(i){ #i is the index of the loci; equation (18) in the paper.# u<- rep(1, N); v<- sex U<- cbind(u,v) zi<- geno[,i]; wi<- 2*((zi)^2) -1 ZiU <- cbind(zi, wi, U) Hi1 <- ZiU %*% solve(t(ZiU) %*% ZiU ) %*% t(ZiU) H0 <- U %*% solve(t(U) %*% U) %*% t(U) Ri<- Hi1-H0 return(Ri) } k<- length(loci) #Find Cov, the equation (17), in the paper.# Cov<- matrix(0, nrow=k, ncol=k ) R<- list(); r<- 0 for(i in chro[loci]){ r<- r+1 R[[r]]<- Rifun(i) } for(i in 1:k){ for(j in 1:k){ Cov[i,j]<- sum(diag( R[[i]] %*% R[[j]] )) * 2/ ( 2* log(10) )^2 } } ICov<- solve(Cov) #ICov is the inverse matrix of Cov.# ### Gram-Schmidt orthonormalization ### Gram_Schmidt<- function(p=power, g=loca.chr[loci] ){ gamma<- g Gamma0<- matrix(0, nrow= length(gamma), ncol=p+1) for(i in 0:p){ Gamma0[,i+1]<- gamma^i } C.l <- ICov %*% Gamma0 #initial values c^{*}_{l}# C_l <- C.l #¥tilde{c}_{0} =c^{*}_{0}# st.f<- function(x){x/ as.vector(sqrt( t(x) %*% Cov %*% x))} #standardization function # hl<- as.list(NULL) hl[[1]]<- diag(1, length(gamma)) cl <- apply(C_l, 2, st.f) #standardize c_{0}# for(i in 2: (p+1) ){ hl[[i]]<- hl[[i-1]] - Cov %*% cl[,i-1] %*% t(cl[,i-1]) cl[,i] <- st.f( t( hl[[i]] ) %*% C.l[,i] ) } return(list(hl=hl, cl=cl)) } GS<- Gram_Schmidt(p=power, g=loca.chr[loci]) Cs<- GS$cl Eifc<- Eif %*% Cs return(list(Lod=Lod, EIF=EIF, Eifc=Eifc, hl=GS$hl, cl=GS$cl)) #Lod scores, EIF, J and Qeif are returned.# } ################################################################ #A function for plotting EIFCs# #The figure shows the individual having the maximum absolute EIFC.# plot.eifc<- function(eifc=EIFC, p=power, mx=1){ eifp<- eifc[,(p+1)] m.x <- which(rank(abs(eifp)) > length(eifp) - mx ) m.eifc<- max(abs(eifc))+10 oldpar<- par(no.readonly=TRUE) on.exit(par(oldpar)) par(cex=1.2) plot(eifp, ylim=c(-m.eifc, m.eifc), xlab="Index", ylab=paste("EIFC_", p)) text(m.x +8, eifp[m.x]-8, paste(m.x, " (", round(eifp[m.x],3),")"),col=1,cex=0.7) par(oldpar) } ############################################################ ############################################################ ### dataset ### X1<-matrix(scan("Data_LogADI.txt", what="character"), 173, byrow=TRUE) ############################################################ ### Input the dataset, specify the chromosom, marker loci and the power of the orthogonal polynomial. ### ortho.eifc<- orthogonal.f(X=X1, chromosome=3, loci=c(3,4,5), power=2) EIFC <- ortho.eifc$Eifc ############################################################ # Plot EIFC, specify p (any integer from 0 to the power), and show the mx individuals having the largest absolute EIFCs. # plot.eifc(EIFC, p=2, mx=3) ############################################################