# # EIFs.R (May 2013, by Xiaoling Dou) # ############################################################ ############################################################ ### Find the EIFs for given coefficient vector cl ########## ############################################################ # This function requires a dataset, loci and a coefficient # # vector cl. # # The loci should be a vector consisting of integers no # # larger than the total number of the marker loci. # # The length of cl should be equal to length(loci). # # The dataset should contain ID, phenotype, sex and # # genotypes. # # The first row of the dataset is the discription of each # # column, the second row contains the name of chromosomes, # # and the third row contains the locations of marker loci. # ############################################################ ############################################################ #rm(list=ls(all=TRUE)) EIFs<- function(X=dataset, loci=c(0,0,0,0), cl=c(1,1,1,1)){ #EIFs<- function(X=dataset, chromosome, loci, cl=coefficient){ if (length(loci) != length(cl)) stop("length(loci) should be equal to length(cl) !") ### QTL data arrangement ### NM<- dim(X) N<- NM[1]-3; M<- NM[2]-3 #N: sample size; M: number of loci# if (sum(loci > M) >= 1) stop(paste("loci should be a vector consisting of integers no larger than ", M, "!") ) geno<- matrix(0, nrow=N, ncol=M) X2<- X[-c(1:3),-c(1:3)] for(i in 1:M){ x0<- as.vector(X2[,i]) x1<- replace(x0, which(x0=="A"), -1) x2<- replace(x1, which(x1=="B"), 1) x3<- replace(x2, which(x2=="H"), 0) geno[,i]<- as.numeric(x3) #genotypes, N*M matrix# } X23<- X[2:3,-c(1:3)] # chromosome number and locations # chr.location<- paste(X23[1,], X23[2,], sep="_") colnames(geno)<- chr.location #chromosome and the locations# pheno<- as.numeric(X[-c(1:3), 2]) #phenotype: pheno# sex<- as.numeric(X[-c(1:3), 3]) #sex of the mice# # chro<- which(X23[1,]==chromosome) # loca.chr<- as.numeric(X23[2, chro]) #locations on the given chromosome# x<- cbind(pheno, sex, geno) #phenotype, sex and genotypes: N*(M+2) matrix# ### Find Lod scores, parameters and EIFs ### Lod<- numeric(M); theta1<- matrix(0, 5, M); logf1<- matrix(0, N, M) y<- pheno #phenotype# ui<- rep(1, N) vi<- sex #sex# xi0<- cbind(ui,vi) bh0<- solve(t(xi0) %*% xi0) %*% t(xi0) %*% y #coefficients under H0# e0<- y- xi0 %*% bh0 L0<- t(e0) %*% e0 sig0<- as.vector(L0/N) #sig0 is the squared sigma0.# theta0<- c(bh0, sig0) #parameters under H0# logc0<- (-log(2 * pi * sig0))/2 ; loge0<- (y-as.vector(xi0 %*% bh0))^2 /(-2* sig0) logf0<- logc0+loge0 ei<- matrix(0, nrow=N, ncol=M) for (i in 1:M){ zi<- geno[,i] #genotypes:z# wi<- 2* (zi^2) -1 #w# xi1<- cbind(zi,wi,ui,vi) # bh1<- solve(t(xi1) %*% xi1)%*% t(xi1) %*% y #coefficients under H1# ei[,i]<- y - xi1 %*% bh1 L1<- t(ei[,i])%*% ei[,i] sig1<- as.vector(L1/N) ; theta1[,i]<- c(bh1, sig1) #sig1 is the squared sigma1; theta1 is the parameters under H1.# Lod[i]<- log((L0 / L1)^(N / 2))/log(10) #This is the LOD score.# logc1<- (-log(2 * pi * sig1))/2 ; loge1<- (y-(xi1 %*% bh1))^2 /(-2* sig1) logf1[,i]<- logc1+loge1 } logD<- (logf1 - logf0)*N / log(10) #Using it to find EIF of the lod scores.# EIF<- t(t(logD)- Lod) #EIF(N*M) is the empirical influence matrix of all the individuals for all LOD scores.# # EIFs for a set of specified loci and a given coefficient vector cl.# Eif<- EIF[,loci] #EIF for the specified loci. Eifc<- as.vector(Eif %*% cl) #Eifc for the loci and the given coefficient vector cl.# return(list(Lod=Lod, EIF=EIF, Eifc=Eifc)) #Lod scores, EIF and Eifc are returned.# } ############################################################ #A function for plotting EIFCs# #The figure shows the individual having the maximum absolute EIFC.# plot.eifs<- function(eifc=Eifc, mx=1){ m.x <- which(rank(abs(eifc)) > length(eifc) - mx ) m.eifc<- max(abs(eifc))+10 oldpar<- par(no.readonly=TRUE) on.exit(par(oldpar)) par(cex=1.2) plot(eifc, ylim=c(-m.eifc, m.eifc), xlab="Index", ylab=paste("EIFC")) text(m.x +8, eifc[m.x]-3, paste(m.x, " (", round(eifc[m.x],3),")"),col=1,cex=0.7) par(oldpar) } ############################################################ ############################################################ ### dataset ### X1<-matrix(scan("Data_LogADI.txt", what="character"), 173, byrow=TRUE) ### Input the dataset, specify marker loci and provide a coefficient vector cl to the function EIFs. ### eifc<- EIFs(X=X1, loci=c(2,30,110), cl=c(0.2,2,0.5)) ### Function EIFs() returns LOD scores, EIF for all loci, and Eifc for the specified loci. ### Eifc<- eifc$Eifc ### Plot Eifc and show the mx individuals having the largest absolute Eifc values. ### plot.eifs(Eifc, mx=3) ############################################################