############################################################### ## Summary-data-based BayesR (SBayesR) ## Author: Jian Zeng ## Created: 16 June 2022 ## Updated: 13 June 2024 ############################################################### library(MCMCpack) sbayesr = function(b, se, n, R, niter = 1000, gamma = c(0, 0.01, 0.1, 1), startPi = c(0.9, 0.06, 0.03, 0.01), startH2 = 0.5){ # b: marginal SNP effects (from GWAS using 0/1/2 genotypes) # se: standard errors # n: GWAS sample size # R: LD correlation matrix # niter: number of iterations (MCMC chain length) # gamma: a vector of scalers for the variance in the multi-normal mixture prior # startPi: starting value for the distribution membership propabilities # startH2: starting value for the SNP-based heritability # Step 1. scale the GWAS SNP effects scale = sqrt(1/(n*se^2 + b^2)) # scale factors for marginal SNP effects b_scaled = b*scale # scaled marginal SNP effects (in units of genotype sd / phenotype sd) vary = 1 # phenotypic variance = 1 after the scaling # Step 2. Declare and initialise variables m = nrow(R) # number of SNPs ndist = length(startPi) # number of mixture distributions pi = startPi # starting value for pi h2 = startH2 # starting value for heritability varg = vary*h2 # genetic variance vare = vary*(1-h2) # residual variance sigmaSq = varg/(m*sum(gamma*pi)) # common factor of SNP effect variance nub = 4 # prior degrees of freedom for SNP effect variance nue = 4 # prior degrees of freedom for residual variance scaleb = (nub-2)/nub*sigmaSq # prior scale parameter for SNP effect variance scalee = (nue-2)/nue*vare # prior scale parameter for residual variance beta = array(0,m) # vector of SNP effects beta_mcmc = matrix(0,niter,m) # MCMC samples of SNP effects probDelta = vector("numeric", ndist) logDelta = array(0,2) keptIter = NULL # keep MCMC samples # adjust/correct b for all SNP effects (which all have zero as initial value) bcorr = b_scaled # MCMC begins for (iter in 1:niter){ # Step 3. Sample each SNP effect from a multi-normal mixture distribution logPi = log(pi) logPiComp = log(1-pi) invSigmaSq = 1/(gamma*c(sigmaSq)) logSigmaSq = log(gamma*c(sigmaSq)) nsnpDist = rep(0, ndist) # number of SNPs in each effect distribution ssq = 0 # weighted sum of squares of beta nnz = 0 # number of nonzero beta # loop over SNPs for (i in 1:m){ oldSample = beta[i] rhs = n*bcorr[i] + n*oldSample # right hand side of the mixed model equation rhs = rhs/vare invLhs = 1.0/(n/c(vare) + invSigmaSq) # inverse of left hand side of the mixed model equation betaHat = invLhs*c(rhs) # BLUP solution # sample the mixture distribution membership logDelta = 0.5*(log(invLhs) - logSigmaSq + betaHat*c(rhs)) + logPi # log likelihood + prior for nonzero effects logDelta[1] = logPi[1] # log likelihood + prior for zero effect for (k in 1:ndist) { probDelta[k] = 1.0/sum(exp(logDelta - logDelta[k])) # posterior probability for each distribution membership } delta = sample(1:ndist, 1, prob = probDelta) # indicator variable for the distribution membership nsnpDist[delta] = nsnpDist[delta] + 1 if (delta > 1) { beta[i] = rnorm(1, betaHat[delta], sqrt(invLhs[delta])) # given the distribution membership, the posterior is a normal distribution bcorr = bcorr + R[,i]*(oldSample - beta[i]) # update bcorr with the new sample ssq = ssq + beta[i]^2 / gamma[delta] nnz = nnz + 1 } else { if (oldSample) bcorr = bcorr + R[,i]*oldSample # update bcorr with the new sample which is zero beta[i] = 0 } } beta_mcmc[iter,] = beta / scale # store beta in the original scale # Step 4. Sample the distribution membership probabilities from a Dirichlet distribution pi = rdirichlet(1, nsnpDist + 1) # Step 5. Sample the SNP effect variance from a scaled inverse chi-square distribution sigmaSq = (ssq + nub*scaleb)/rchisq(1, nnz+nub) # Step 6. Compute the SNP-based heritability bRb = crossprod(beta, (b_scaled-bcorr)) # compute genetic variance = beta' R beta varg = bRb h2 = varg # Step 7. Sample the residual variance from a scaled inverse chi-square distribution sse = (vary - varg)*n vare = (sse + nue*scalee)/rchisq(1, n+nue) if (vare <= 0) vare = vary # sometimes sse can be negative and would cause a problem # The final step in each iteration is to store the MCMC samples for the parameter values keptIter <- rbind(keptIter,c(pi, nnz, sigmaSq, h2, vare)) if (!(iter%%100)) { cat (sprintf("\n iter %4s, nnz = %4s, sigmaSq = %6.3f, h2 = %6.3f, vare = %6.3f\n", iter, nnz, sigmaSq, h2, vare)) } } colnames(keptIter) <- c(paste0("Pi", 1:ndist),"Nnz","SigmaSq","h2","Vare") postMean = apply(keptIter, 2, mean) # posterior mean of MCMC samples is used as the point estimate for each parameter cat("\nPosterior mean:\n") print(postMean) return(list(par=keptIter, beta=beta_mcmc)) }