############################################################### ## Summary-data-based BayesRC (SBayesRC) ## Author: Jian Zeng ## Created: 13 June 2024 ############################################################### library(truncnorm) sbayesrc = function(b, se, n, R, anno, niter = 1000, gamma = c(0, 0.01, 0.1, 1), startPi = c(0.9, 0.06, 0.03, 0.01), startH2 = 0.5){ # b: marginal SNP effects (from GWAS using 0/1/2 genotypes) # se: standard errors # n: GWAS sample size # R: LD correlation matrix # anno: annotation matrix (number of SNPs by number of annotations) # niter: number of iterations (MCMC chain length) # gamma: a vector of scalers for the variance in the multi-normal mixture prior # startPi: starting value for the distribution membership propabilities # startH2: starting value for the SNP-based heritability # Step 1. scale the GWAS SNP effects scale = sqrt(1/(n*se^2 + b^2)) # scale factors for marginal SNP effects b_scaled = b*scale # scaled marginal SNP effects (in units of genotype sd / phenotype sd) vary = 1 # phenotypic variance = 1 after the scaling # Step 2. Declare and initialise variables m = nrow(R) # number of SNPs ndist = length(startPi) # number of mixture distributions pi = startPi # starting value for pi h2 = startH2 # starting value for heritability varg = vary*h2 # genetic variance vare = vary*(1-h2) # residual variance sigmaSq = varg/(m*sum(gamma*pi)) # common factor of SNP effect variance nub = 4 # prior degrees of freedom for SNP effect variance nue = 4 # prior degrees of freedom for residual variance scaleb = (nub-2)/nub*sigmaSq # prior scale parameter for SNP effect variance scalee = (nue-2)/nue*vare # prior scale parameter for residual variance beta = array(0,m) # vector of SNP effects beta_mcmc = matrix(0,niter,m) # MCMC samples of SNP effects probDelta = vector("numeric", ndist) logDelta = array(0,2) keptIter = NULL # annotation related variables snpPi = matrix(rep(pi, m), byrow=TRUE, nrow=m, ncol=ndist) # per-SNP pi given the SNP annotations alpha_mcmc = matrix(0, niter, ncol(anno)*(ndist-1)) # MCMC samples of annotation effects p = matrix(nrow = m, ncol = ndist-1) # per-SNP conditional probability p z = matrix(nrow = m, ncol = ndist-1) # per-SNP conditional distribution membership indicator alpha = matrix(0, nrow = ncol(anno), ncol = ndist-1) # vector of annotation effects sigmaSqAlpha = rep(1, ndist-1) # variance of annotation effects sigmaSqAlpha_mcmc = matrix(0, niter, ndist-1) # MCMC samples of the variance of annotation effects nAnno = ncol(anno) # number of annotations annoDiag = apply(anno, 2, crossprod) # diagonal values of A'A where A is the annotation matrix # adjust/correct b for all SNP effects (which all have zero as initial value) bcorr = b_scaled # MCMC begins for (iter in 1:niter){ # Step 3. Sample each SNP effect from a multi-normal mixture distribution logPi = log(snpPi) invSigmaSq = 1/(gamma*c(sigmaSq)) logSigmaSq = log(gamma*c(sigmaSq)) nsnpDist = rep(0, ndist) # number of SNPs in each effect distribution ssq = 0 # weighted sum of squares of beta nnz = 0 # number of nonzero beta z = matrix(0, nrow = m, ncol = ndist-1) # indicator variable for "stepping up" one mixture component # loop over SNPs for (i in 1:m){ oldSample = beta[i] rhs = n*bcorr[i] + n*oldSample # right hand side of the mixed model equation rhs = rhs/vare invLhs = 1.0/(n/c(vare) + invSigmaSq) # inverse of left hand side of the mixed model equation betaHat = invLhs*c(rhs) # BLUP solution # sample the mixture distribution membership logDelta = 0.5*(log(invLhs) - logSigmaSq + betaHat*c(rhs)) + logPi[i,] # log likelihood + prior for nonzero effects logDelta[1] = logPi[i,1] # log likelihood + prior for zero effect for (k in 1:ndist) { probDelta[k] = 1.0/sum(exp(logDelta - logDelta[k])) # posterior probability for each distribution membership if (is.na(probDelta[k])) probDelta[k] = 0 # prevent overflow } delta = sample(1:ndist, 1, prob = probDelta) # indicator variable for the distribution membership nsnpDist[delta] = nsnpDist[delta] + 1 if (delta > 1) { beta[i] = rnorm(1, betaHat[delta], sqrt(invLhs[delta])) # given the distribution membership, the posterior is a normal distribution bcorr = bcorr + R[,i]*(oldSample - beta[i]) # update bcorr with the new sample ssq = ssq + beta[i]^2 / gamma[delta] nnz = nnz + 1 for(j in 1:(delta-1)){ # set one to the "step up" indicator variable z[i,j] = 1 } } else { if (oldSample) bcorr = bcorr + R[,i]*oldSample # update bcorr with the new sample which is zero beta[i] = 0 } } beta_mcmc[iter,] = beta / scale # store beta in the original scale # Step 4. Sample the annotation effects on the SNP distribution mixing probabilities for (j in 1:(ndist-1)) { # have ndist-1 number of conditional distributions if (j==1) { idx = 1:m # for the first conditional distribution, data are all SNPs annoDiagj = annoDiag # diagonal values of A'A } else { idx = which(z[,j-1] > 0) # for the subsequent conditional distributions, data are the SNPs in the previous distribution if (length(idx) > 1) { annoDiagj = apply(as.matrix(anno[idx,]), 2, crossprod) # recompute A'A depending on the SNP memberships } } if (length(idx)) { zj = z[idx,j] mu = anno[idx,] %*% matrix(alpha[,j]) # linear combination of annotations, which will be the mean of truncated normal distribution lj = array(0, length(zj)) # latent variable if (sum(zj==0)) lj[zj==0] = rtruncnorm(sum(zj==0), mean = mu[zj==0], sd = 1, a = -Inf, b = 0) # sample latent variable if (sum(zj==1)) lj[zj==1] = rtruncnorm(sum(zj==1), mean = mu[zj==1], sd = 1, a = 0, b = Inf) # sample latent variable # sample annotation effects using Gibbs sampler (similar to the SNP effect sampler in the individual-level model) lj = lj - c(mu) # adjust the latent variable by all annotation effects # sample intercept with a flat prior oldSample = alpha[1,j] rhs = sum(lj) + m*oldSample invLhs = 1.0/m ahat = invLhs*rhs alpha[1,j] = rnorm(1, ahat, sqrt(invLhs)) lj = lj + (oldSample - alpha[1,j]) # sample each annotation effect with a normal prior if (nAnno > 1) { for (k in 2:nAnno) { oldSample = alpha[k,j] rhs = crossprod(anno[idx,k], lj) + annoDiagj[k]*oldSample; invLhs = 1.0/(annoDiagj[k] + 1.0/sigmaSqAlpha[j]) ahat = invLhs*rhs alpha[k,j] = rnorm(1, ahat, sqrt(invLhs)) lj = lj + anno[idx,k]*(oldSample - alpha[k,j]) } # sample annotation effect variance from a scaled inverse chi-square distribution sigmaSqAlpha[j] = (sum(alpha[-1,j]^2) + 2)/rchisq(1, nAnno-1+2) } } } alpha_mcmc[iter,] = c(alpha) # store MCMC samples of annotation effects sigmaSqAlpha_mcmc[iter,] = sigmaSqAlpha # Step 5. Compute per-SNP conditional probabilities from the annotation effects p = apply(alpha, 2, function(x){pnorm(anno %*% x)}) # Step 6. Compute the joint probabilities (pi) from the conditional probabilities (p) for (k in 1:ndist) { if (k < (ndist-1)) { snpPi[,k] = 1.0 - p[,k] } else { snpPi[,k] = 1 } if (k > 1) { for (kk in 1:(k-1)) { snpPi[,k] = snpPi[,k]*p[,kk] } } } #for example, we want #snpPi[,1] = 1 - p[,1] #snpPi[,2] = (1-p[,2])*p[,1] #snpPi[,3] = (1-p[,3])*p[,1]*p[,2] #snpPi[,4] = p[,1]*p[,2]*p[,3] # Step 7. Sample SNP effect variance from a scaled inverse chi-square distribution sigmaSq = (ssq + nub*scaleb)/rchisq(1, nnz+nub) # Step 8. Compute the SNP-based heritability bRb = crossprod(beta, (b_scaled-bcorr)) # compute genetic variance = beta' R beta varg = bRb h2 = varg # Step 9. Sample the residual variance from a scaled inverse chi-square distribution sse = (vary - varg)*n vare = (sse + nue*scalee)/rchisq(1, n+nue) if (vare <= 0) vare = vary # sometimes sse can be negative and would cause a problem # The final step in each iteration is to store the MCMC samples for the parameter values keptIter <- rbind(keptIter,c(colMeans(snpPi), nnz, sigmaSq, h2, vare)) if (!(iter%%100)) { cat (sprintf("\n iter %4s, nnz = %4s, sigmaSq = %6.3f, h2 = %6.3f, vare = %6.3f \n", iter, nnz, sigmaSq, h2, vare)) } } colnames(keptIter) <- c(paste0("Pi", 1:ndist),"Nnz","SigmaSq","h2","Vare") par = apply(keptIter, 2, mean) sdpar = apply(keptIter, 2, sd) betaMean = apply(beta_mcmc, 2, mean) alphaMean = matrix(apply(alpha_mcmc, 2, mean), ncol=ndist-1) alphaSD = matrix(apply(alpha_mcmc, 2, sd), ncol=ndist-1) colnames(alphaMean) = colnames(alphaSD) = c(paste0("p", 2:ndist)) rownames(alphaMean) = rownames(alphaSD) = colnames(anno) # compute posterior mean of p for each annotation pAnno = pnorm(alphaMean) if (nAnno > 1){ # the first is the intercept for (j in 2:nAnno) { pAnno[j,] = pnorm(alphaMean[1,] + alphaMean[j,]) } } # compute posterior mean of pi for each annotation piAnno = matrix(nrow=nAnno, ncol=ndist, dimnames=list(row.names(pAnno), paste0("Pi", 1:ndist))) for (k in 1:ndist) { if (k < (ndist-1)) { piAnno[,k] = 1.0 - pAnno[,k] } else { piAnno[,k] = 1 } if (k > 1) { for (kk in 1:(k-1)) { piAnno[,k] = piAnno[,k]*pAnno[,kk] } } } cat("\nPosterior mean of model parameters:\n") print(par) cat("\nAnnotation effects:\n") print(alphaMean) cat("\nConditional probabilities:\n") print(round(pAnno,4)) cat("\nJoint probabilities:\n") print(round(piAnno,4)) return(list(par=par, beta=beta_mcmc, alphaMean=alphaMean, pAnno=pAnno, piAnno=piAnno)) }