###############################################################
## Summary-data-based BayesR (SBayesR)
## Author: Jian Zeng
## Created: 16 June 2022
## Updated: 13 June 2024
###############################################################

library(MCMCpack)

sbayesr = function(b, se, n, R, niter = 1000, gamma = c(0, 0.01, 0.1, 1), startPi = c(0.9, 0.06, 0.03, 0.01), startH2 = 0.5){
  # b: marginal SNP effects (from GWAS using 0/1/2 genotypes)
  # se: standard errors
  # n: GWAS sample size
  # R: LD correlation matrix
  # niter: number of iterations (MCMC chain length)
  # gamma: a vector of scalers for the variance in the multi-normal mixture prior
  # startPi: starting value for the distribution membership propabilities
  # startH2: starting value for the SNP-based heritability
  
  # Step 1. scale the GWAS SNP effects
  scale = sqrt(1/(n*se^2 + b^2))  # scale factors for marginal SNP effects
  b_scaled  = b*scale             # scaled marginal SNP effects (in units of genotype sd / phenotype sd)
  vary  = 1                       # phenotypic variance = 1 after the scaling
  
  # Step 2. Declare and initialise variables
  m     = nrow(R)          # number of SNPs
  ndist = length(startPi)  # number of mixture distributions
  pi    = startPi          # starting value for pi
  h2    = startH2          # starting value for heritability
  varg  = vary*h2          # genetic variance
  vare  = vary*(1-h2)      # residual variance
  sigmaSq = varg/(m*sum(gamma*pi))    # common factor of SNP effect variance
  nub = 4                  # prior degrees of freedom for SNP effect variance
  nue = 4                  # prior degrees of freedom for residual variance
  scaleb = (nub-2)/nub*sigmaSq  # prior scale parameter for SNP effect variance
  scalee = (nue-2)/nue*vare     # prior scale parameter for residual variance
  beta = array(0,m)        # vector of SNP effects
  beta_mcmc = matrix(0,niter,m) # MCMC samples of SNP effects
  probDelta = vector("numeric", ndist)
  logDelta = array(0,2)
  keptIter = NULL          # keep MCMC samples
  
  # adjust/correct b for all SNP effects (which all have zero as initial value)
  bcorr = b_scaled
  
  # MCMC begins
  for (iter in 1:niter){
    # Step 3. Sample each SNP effect from a multi-normal mixture distribution
    logPi = log(pi)
    logPiComp = log(1-pi)
    invSigmaSq = 1/(gamma*c(sigmaSq))
    logSigmaSq = log(gamma*c(sigmaSq))
    nsnpDist = rep(0, ndist)  # number of SNPs in each effect distribution
    ssq = 0  # weighted sum of squares of beta
    nnz = 0  # number of nonzero beta
    # loop over SNPs
    for (i in 1:m){
      oldSample = beta[i]
      rhs = n*bcorr[i] + n*oldSample            # right hand side of the mixed model equation
      rhs = rhs/vare
      invLhs = 1.0/(n/c(vare) + invSigmaSq)     # inverse of left hand side of the mixed model equation
      betaHat = invLhs*c(rhs)                   # BLUP solution
      
      # sample the mixture distribution membership
      logDelta = 0.5*(log(invLhs) - logSigmaSq + betaHat*c(rhs)) + logPi  # log likelihood + prior for nonzero effects
      logDelta[1] = logPi[1]                                              # log likelihood + prior for zero effect
      for (k in 1:ndist) {
        probDelta[k] = 1.0/sum(exp(logDelta - logDelta[k]))     # posterior probability for each distribution membership
      }
      
      delta = sample(1:ndist, 1, prob = probDelta)    # indicator variable for the distribution membership
      nsnpDist[delta] = nsnpDist[delta] + 1
      
      if (delta > 1) {
        beta[i] = rnorm(1, betaHat[delta], sqrt(invLhs[delta]))   # given the distribution membership, the posterior is a normal distribution
        bcorr = bcorr + R[,i]*(oldSample - beta[i])         # update bcorr with the new sample
        ssq = ssq + beta[i]^2 / gamma[delta]
        nnz = nnz + 1
      } else {
        if (oldSample) bcorr = bcorr + R[,i]*oldSample      # update bcorr with the new sample which is zero
        beta[i] = 0
      }
    }	
    beta_mcmc[iter,] = beta / scale  #  store beta in the original scale
    
    # Step 4. Sample the distribution membership probabilities from a Dirichlet distribution
    pi = rdirichlet(1, nsnpDist + 1)
    
    # Step 5. Sample the SNP effect variance from a scaled inverse chi-square distribution
    sigmaSq = (ssq + nub*scaleb)/rchisq(1, nnz+nub)
    
    # Step 6. Compute the SNP-based heritability
    bRb = crossprod(beta, (b_scaled-bcorr))  # compute genetic variance = beta' R beta
    varg = bRb
    h2  = varg
    
    # Step 7. Sample the residual variance from a scaled inverse chi-square distribution
    sse = (vary - varg)*n
    vare = (sse + nue*scalee)/rchisq(1, n+nue)
    if (vare <= 0) vare = vary  # sometimes sse can be negative and would cause a problem
    
    # The final step in each iteration is to store the MCMC samples for the parameter values
    keptIter <- rbind(keptIter,c(pi, nnz, sigmaSq, h2, vare))
    
    if (!(iter%%100)) {
      cat (sprintf("\n iter %4s, nnz = %4s, sigmaSq = %6.3f, h2 = %6.3f, vare = %6.3f\n", iter, nnz, sigmaSq, h2, vare))
    }
  }
      
  colnames(keptIter) <- c(paste0("Pi", 1:ndist),"Nnz","SigmaSq","h2","Vare")
  postMean = apply(keptIter, 2, mean)  # posterior mean of MCMC samples is used as the point estimate for each parameter
  cat("\nPosterior mean:\n")
  print(postMean)
  return(list(par=keptIter, beta=beta_mcmc))
 }