#
# Orthogonal.R (May 2013, by Xiaoling Dou)
#
############################################################
############################################################
### Find EIFC by the orthogonal polynomial method (EIFC) ###
############################################################
# This function requires a dataset, chromosome number, loci#
# and the power of the orthogonal polynomial.              #
# The loci should be a vector consisting of integers no    #
# larger than the total number of loci.                    #
# The dataset should contain ID, phenotype, sex and        #
# genotypes.                                               #
# The first row of the dataset is the description of each  #
# column, the second row contains the name of chromosomes, #
# and the third row contains the locations of marker loci. #
############################################################
############################################################

#rm(list=ls(all=TRUE))

orthogonal.f<- function(X=dataset, chromosome, loci, power){

### QTL data arrangement ###

        NM<- dim(X)
        N<- NM[1]-3; M<- NM[2]-3 #N: sample size; M: number of loci#
        geno<- matrix(0, nrow=N, ncol=M) 
        X2<- X[-c(1:3),-c(1:3)] 
        for(i in 1:M){
        x0<- as.vector(X2[,i])
        x1<- replace(x0, which(x0=="A"), -1)
        x2<- replace(x1, which(x1=="B"), 1)
        x3<- replace(x2, which(x2=="H"), 0)
        geno[,i]<- as.numeric(x3)               #genotypes, N*M matrix#
        }

        X23<- X[2:3,-c(1:3)] #chromosome number and locations#
        chr.location<- paste(X23[1,], X23[2,], sep="_")
        colnames(geno)<- chr.location           #chromosome and the locations#

        pheno<- as.numeric(X[-c(1:3), 2])       #phenotype: pheno#
        sex<- as.numeric(X[-c(1:3), 3])         #sex of the mice#

        chro<- which(X23[1,]==chromosome)
        loca.chr<- as.numeric(X23[2, chro])     #locations on the given chromosome#

        x<- cbind(pheno, sex, geno)             #phenotype, sex and genotypes: N*(M+2) matrix#

### Find Lod scores, parameters and EIFs ###
        Lod<- numeric(M); theta1<- matrix(0, 5, M); logf1<- matrix(0, N, M)
        y<- pheno                               #phenotype#
        ui<- rep(1, N)  
        vi<- sex  #sex#
        xi0<- cbind(ui,vi)
        bh0<- solve(t(xi0) %*% xi0) %*% t(xi0) %*% y  #coefficients under H0#
        e0<- y- xi0 %*% bh0
        L0<- t(e0) %*% e0 
        sig0<- as.vector(L0/N)                  #sig0 is the squared sigma0.#
        theta0<- c(bh0, sig0)                   #parameters under H0#
        logc0<- (-log(2 * pi * sig0))/2 ; loge0<- (y-as.vector(xi0 %*% bh0))^2 /(-2* sig0)
        logf0<- logc0+loge0

        ei<- matrix(0, nrow=N, ncol=M)
        for (i in 1:M){
        zi<- geno[,i]                           #genotypes:z#
        wi<- 2* (zi^2) -1                       #w#
        xi1<- cbind(zi,wi,ui,vi) ##
        bh1<- solve(t(xi1) %*% xi1)%*% t(xi1) %*% y   #coefficients under H1#
        ei[,i]<- y - xi1 %*% bh1
        L1<- t(ei[,i])%*% ei[,i]
        sig1<- as.vector(L1/N) ; theta1[,i]<- c(bh1, sig1)   #sig1 is the squared sigma1; theta1 is the parameters under H1.#
        Lod[i]<- log((L0 / L1)^(N / 2))/log(10) #This is the LOD score.#

        logc1<- (-log(2 * pi * sig1))/2 ; loge1<- (y-(xi1 %*% bh1))^2 /(-2* sig1)
        logf1[,i]<- logc1+loge1
        }

        logD<- (logf1 - logf0)*N / log(10)      #Using it to find EIF of the lod scores.#


        EIF<- t(t(logD)- Lod)                   #EIF(N*M) is the empirical influence matrix of all the individuals for all LOD scores.#

### Specify the chromosome and the loci ###
        m <- length(chro)

        if (sum(loci > m ) >= 1) stop(paste("loci should be a vector containing some integers no larger than ", m, "!"))

        Eif <- EIF[,chro[loci]]

        Rifun<- function(i){  #i is the index of the loci; equation (18) in the paper.#
        u<- rep(1, N); v<- sex
        U<- cbind(u,v)
        zi<- geno[,i]; wi<- 2*((zi)^2) -1 
        ZiU <- cbind(zi, wi, U)
        Hi1 <- ZiU %*% solve(t(ZiU) %*% ZiU ) %*% t(ZiU)
        H0 <- U %*% solve(t(U) %*% U) %*% t(U)
        Ri<- Hi1-H0
        return(Ri)
        }

        k<- length(loci)                        #Find Cov, the equation (17), in the paper.#
        Cov<- matrix(0, nrow=k, ncol=k )
        R<- list(); r<- 0
        for(i in chro[loci]){
                r<- r+1
                R[[r]]<- Rifun(i)
        }

for(i in 1:k){
        for(j in 1:k){
                Cov[i,j]<- sum(diag( R[[i]] %*% R[[j]] )) * 2/ ( 2* log(10) )^2 
        }
}

ICov<- solve(Cov)                               #ICov is the inverse matrix of Cov.#

### Gram-Schmidt orthonormalization ###

Gram_Schmidt<- function(p=power, g=loca.chr[loci] ){
gamma<- g
Gamma0<- matrix(0, nrow= length(gamma), ncol=p+1)
for(i in 0:p){
        Gamma0[,i+1]<- gamma^i
} 

C.l <- ICov %*% Gamma0   #initial values c^{*}_{l}#

C_l <- C.l                       #¥tilde{c}_{0} =c^{*}_{0}#
st.f<- function(x){x/ as.vector(sqrt( t(x) %*% Cov %*% x))}      #standardization function #

hl<- as.list(NULL)
hl[[1]]<- diag(1, length(gamma))

cl <- apply(C_l, 2, st.f)                       #standardize c_{0}#  
for(i in 2: (p+1) ){
        hl[[i]]<- hl[[i-1]] - Cov %*% cl[,i-1] %*% t(cl[,i-1]) 
   cl[,i] <- st.f( t( hl[[i]] ) %*% C.l[,i] )
}

return(list(hl=hl, cl=cl))
}


GS<- Gram_Schmidt(p=power, g=loca.chr[loci])

Cs<- GS$cl
Eifc<- Eif %*% Cs

return(list(Lod=Lod, EIF=EIF, Eifc=Eifc, hl=GS$hl, cl=GS$cl))                   #Lod scores, EIF, J and Qeif are returned.#
        }

################################################################
#A function for plotting EIFCs#
#The figure shows the individual having the maximum absolute EIFC.#

plot.eifc<- function(eifc=EIFC, p=power, mx=1){
        eifp<- eifc[,(p+1)]
        m.x <- which(rank(abs(eifp)) > length(eifp) - mx )
        m.eifc<- max(abs(eifc))+10
        oldpar<- par(no.readonly=TRUE)
        on.exit(par(oldpar))
        par(cex=1.2)
        plot(eifp, ylim=c(-m.eifc, m.eifc), xlab="Index", ylab=paste("EIFC_", p))
        text(m.x +8, eifp[m.x]-8, paste(m.x, " (", round(eifp[m.x],3),")"),col=1,cex=0.7)
        par(oldpar)
}
############################################################
############################################################


### dataset ###
X1<-matrix(scan("Data_LogADI.txt", what="character"), 173, byrow=TRUE)
############################################################
### Input the dataset, specify the chromosom, marker loci and the power of the orthogonal polynomial. ###

ortho.eifc<- orthogonal.f(X=X1, chromosome=3, loci=c(3,4,5), power=2)
EIFC <- ortho.eifc$Eifc

############################################################
# Plot EIFC, specify p (any integer from 0 to the power), and show the mx individuals having the largest absolute EIFCs. #

plot.eifc(EIFC, p=2, mx=3)
############################################################