############################# ## BayesR ## Author: Jian Zeng ## Created: 16 June 2022 ## Updated: 13 June 2024 ############################# library(MCMCpack) bayesr = function(X, y, niter = 1000, gamma = c(0, 0.01, 0.1, 1), startPi = c(0.5, 0.3, 0.15, 0.05), startH2 = 0.5){ # X: genotype matrix # y: phenotype vector # niter: number of iterations (MCMC chain length) # gamma: a vector of scalers for the variance in the multi-normal mixture prior # startPi: starting value for the distribution membership propabilities # startH2: starting value for the SNP-based heritability # Step 1. Declare and initialise variables n = nrow(X) # number of observations m = ncol(X) # number of SNPs ndist = length(startPi) # number of mixture distributions pi = startPi # starting value for pi h2 = startH2 # starting value for heritability vary = var(y) # phenotypic variance varg = vary*h2 # starting value for genetic variance vare = vary*(1-h2) # starting value for residual variance sigmaSq = varg/(m*sum(gamma*pi)) # common factor of SNP effect variance nub = 4 # prior degrees of freedom for SNP effect variance nue = 4 # prior degrees of freedom for residual variance scaleb = (nub-2)/nub*sigmaSq # prior scale parameter for SNP effect variance scalee = (nue-2)/nue*vare # prior scale parameter for residual variance beta = array(0,m) # vector of SNP effects beta_mcmc = matrix(0,niter,m) # MCMC samples of SNP effects mu = mean(y) # overall mean xpx = apply(X, 2, crossprod) ## diagonal elements of X'X probDelta = vector("numeric", ndist) logDelta = array(0,2) keptIter = NULL # keep MCMC samples # adjust/correct y for population mean and all SNP effects (which all have zero as initial value) ycorr = y - mu # MCMC begins for (iter in 1:niter){ # Step 2. Sample the mean from a normal posterior ycorr = ycorr + mu # unadjust y with the old sample rhs = sum(ycorr) # right hand side of the mixed model equation invLhs = 1/n # inverse of left hand side of the mixed model equation muHat = invLhs*rhs # BLUE solution mu = rnorm(1, muHat, sqrt(invLhs*vare)) # posterior is a normal distribution ycorr = ycorr - mu # adjust y with the new sample # Step 3. Sample each SNP effect from a multi-normal mixture distribution logPi = log(pi) logPiComp = log(1-pi) invSigmaSq = 1/(gamma*c(sigmaSq)) logSigmaSq = log(gamma*c(sigmaSq)) nsnpDist = rep(0, ndist) # number of SNPs asigned to each mixture component ssq = 0 # weighted sum of squares of SNP effects nnz = 0 # number of nonzero effects ghat = array(0,n) # individual genetic value # loop over SNPs for (j in 1:m){ oldSample = beta[j] rhs = crossprod(X[,j], ycorr) + xpx[j]*oldSample # right hand side of the mixed model equation rhs = rhs/vare invLhs = 1.0/(xpx[j]/c(vare) + invSigmaSq) # inverse of left hand side of the mixed model equation betaHat = invLhs*c(rhs) # BLUP solution # sample the mixture distribution membership logDelta = 0.5*(log(invLhs) - logSigmaSq + betaHat*c(rhs)) + logPi # log likelihood + prior for nonzero effects logDelta[1] = logPi[1] # log likelihood + prior for zero effect for (k in 1:ndist) { probDelta[k] = 1.0/sum(exp(logDelta - logDelta[k])) # posterior probability for each distribution membership } delta = sample(1:ndist, 1, prob = probDelta) # indicator variable for the distribution membership nsnpDist[delta] = nsnpDist[delta] + 1 if (delta > 1) { beta[j] = rnorm(1, betaHat[delta], sqrt(invLhs[delta])) # given the distribution membership, the posterior is a normal distribution ycorr = ycorr + X[,j]*(oldSample - beta[j]) # update ycorr with the new sample ghat = ghat + X[,j]*beta[j] ssq = ssq + beta[j]^2 / gamma[delta] nnz = nnz + 1 } else { if (oldSample) ycorr = ycorr + X[,j]*oldSample # update ycorr with the new sample which is zero beta[j] = 0 } } beta_mcmc[iter,] = beta # Step 4. Sample the distribution membership probabilities from a Dirichlet distribution pi = rdirichlet(1, nsnpDist + 1) # Step 5. Sample the SNP effect variance from a scaled inverse chi-square distribution sigmaSq = (ssq + nub*scaleb)/rchisq(1, nnz+nub) # Step 6. Sample the residual variance from a scaled inverse chi-square distribution vare = (crossprod(ycorr) + nue*scalee)/rchisq(1, n+nue) # Step 7. Compute the SNP-based heritability varg = var(ghat) h2 = varg/(varg + vare) # The final step in each iteration is to store the MCMC samples for the parameter values keptIter <- rbind(keptIter,c(pi, nnz, sigmaSq, h2, vare, varg)) if (!(iter%%100)) { cat (sprintf("\n iter %4s, nnz = %4s, sigmaSq = %6.3f, h2 = %6.3f, vare = %6.3f, varg = %6.3f \n", iter, nnz, sigmaSq, h2, vare, varg)) } } colnames(keptIter) <- c(paste0("Pi", 1:ndist),"Nnz","SigmaSq","h2","Vare","Varg") postMean = apply(keptIter, 2, mean) # posterior mean of MCMC samples is used as the point estimate for each parameter cat("\nPosterior mean:\n") print(postMean) return(list(par=keptIter, beta=beta_mcmc)) }