############################################################### ## Summary-data-based BayesR (SBayesR) ## Author: Jian Zeng ## Date: 16 June 2022 ## Input data: ## b: marginal SNP effects (from GWAS using 0/1/2 genotypes) ## se: standard errors ## n: GWAS sample size ## R: LD correlation matrix ############################################################### library(MCMCpack) sbayesr = function(b, se, n, R, niter = 1000, gamma = c(0, 0.01, 0.1, 1), startPi = c(0.9, 0.06, 0.03, 0.01), startH2 = 0.5){ m = nrow(R) # number of SNPs ndist = length(startPi) # number of mixture distributions pi = startPi # starting value for pi h2 = startH2 # starting value for heritability scale = sqrt(1/(n*se^2)) # scale factors for marginal SNP effects, i.e., sqrt(2pq/vary) bhat = b*scale # scaled marginal SNP effects (in units of genotype sd / phenotype sd) vary = 1 # phenotypic variance = 1 due to the scaling varg = h2 # genetic variance vare = vary # assuming each SNP effect is vanishingly small sigmaSq = varg/(m*sum(gamma*pi)) # common factor of SNP effect variance nub = 4 # prior degrees of freedom for SNP effect variance nue = 4 # prior degrees of freedom for residual variance scaleb = (nub-2)/nub*sigmaSq # prior scale parameter for SNP effect variance scalee = (nue-2)/nue*vare # prior scale parameter for residual variance beta = array(0,m) # vector of SNP effects beta_mcmc = matrix(0,niter,m) # MCMC samples of SNP effects bhatcorr = bhat probDelta = vector("numeric", ndist) logDelta = array(0,2) keptIter = NULL for (iter in 1:niter){ # sampling SNP effects logPi = log(pi) logPiComp = log(1-pi) invSigmaSq = 1/(gamma*c(sigmaSq)) logSigmaSq = log(gamma*c(sigmaSq)) nsnpDist = rep(0, ndist) # number of SNPs in each effect distribution ssq = 0 # sum of squares of beta nnz = 0 # number of nonzero beta for (i in 1:m){ oldSample = beta[i] rhs = (bhatcorr[i] + oldSample)/(vare/n) invLhs = 1.0/(1/c(vare/n) + invSigmaSq) uhat = invLhs*c(rhs) # sampling mixture distribution membership logDelta = 0.5*(log(invLhs) - logSigmaSq + uhat*c(rhs)) + logPi logDelta[1] = logPi[1]; for (k in 1:ndist) { probDelta[k] = 1.0/sum(exp(logDelta - logDelta[k])) } delta = sample(1:ndist, 1, prob = probDelta) nsnpDist[delta] = nsnpDist[delta] + 1 if (delta > 1) { beta[i] = rnorm(1, uhat[delta], sqrt(invLhs[delta])) bhatcorr = bhatcorr + R[,i]*(oldSample - beta[i]) ssq = ssq + beta[i]^2 / gamma[delta] nnz = nnz + 1 } else { if (oldSample) bhatcorr = bhatcorr + R[,i]*oldSample beta[i] = 0 } } beta_mcmc[iter,] = beta / scale # store beta in the original scale # sampling pi pi = rdirichlet(1, nsnpDist + 1) # sampling SNP effect variance sigmaSq = (ssq + nub*scaleb)/rchisq(1, nnz+nub) # sampling residual variance #sse = (vary - crossprod(beta, (bhat - bhatcorr)))*n #vare = (sse + nue*scalee)/rchisq(1, n+nue) # compute genetic variance and heritability bRb = crossprod(beta, (bhat-bhatcorr)) varg = bRb h2 = varg/vary keptIter <- rbind(keptIter,c(pi, nnz, sigmaSq, h2, vare)) if (!(iter%%100)) { cat (sprintf("\n iter %4s, nnz = %4s, sigmaSq = %6.3f, h2 = %6.3f, vare = %6.3f\n", iter, nnz, sigmaSq, h2, vare)) } } colnames(keptIter) <- c(paste0("Pi", 1:length(pi)),"Nnz","SigmaSq","h2","Vare") postMean = apply(keptIter, 2, mean) cat("\nPosterior mean:\n") print(postMean) return(list(par=keptIter, beta=beta_mcmc)) }