##################################################### # EXAMPLE CODE FOR MENDELIAN RANDOMIZATION ANALYSES # ##################################################### # MARIE-JO BRION # 4 JUL 2016 # ------------------------------------------------------------------------------------------------------------------------------------------ # VARIABLES: # Y as the outcome variable # X as the exposure variable # Z as the SNP instrument # A as the allele score for the predictor # Ordinary least squares regression (phenotypic association which contains CONFOUNDING) summary(lm(Y~X)) # ------------------------------------------------------------------------------------------------------------------------------------------ # TWO-STAGE LEAST SQUARES (TSLS) ON INDIVIDUAL LEVEL DATA # R package for two stage least squares analysis library(AER) # Single-SNP TSLS MR summary( ivreg(Y ~ X | Z)) # Example: summary(ivreg(BMI ~ CRP | rs12037, data=mrtest)) # rs12037 as a CRP SNP ('exposure SNP' proxy) # Multi-SNP TSLS MR summary( ivreg(Y ~ X | Z1 + Z2 + Z3)) # Example: summary(ivreg(BMI ~ CRP | rs12037 + rs4206 + rs4129 + rs2794, data=mrtest)) # rs12037, rs4206, rs4129, rs2794 as CRP SNPs (multiple 'exposure SNP' proxies) #Allelic-score TSLS MR summary(ivreg(Y ~ X | A, data=mrtest) # Note: First generate allelic scores for the exposure (using PLINK or R) # Ensure alleles are all set to "increasing allele" with respect to exposure phenotype, when creating the allele score # Example: summary(ivreg(BMI ~ CRP | CRPscore, data=mrtest)) # CRP score as a weighted or unweighted allele score based on the number of CRP-increasing SNPs an individual has # ------------------------------------------------------------------------------------------------------------------------------------------ # APPLICABLE TO GWAS SUMMARY STATISTIC DATA AND FOR TWO-SAMPLE SUMMARY DATA # b_out as SNP effect estimate for outcome # b_exp as SNP effect estimate for exposure # b_exp_maf -> b_exp/se_out ('maf corrected' SNP effect estimate for exposure) # se_out as standard error for the SNP effect on the outcome # b_iv as the MR causal estimate ("IV estimate") based on the SNP (can be estimated using Wald estimate b_out/b_exp) # IVW MR ivw.r <- lm(b_out ~ - 1 + b_exp, weights = (1 / (se_out)^2) # MR Egger egg.r <- lm(b_out ~ b_exp, weights = (1 / (se_out)^2)) # Egger scatter plot ggplot(data, aes(y = b_out , x = b_exp_maf)) # Egger funnel plot: ggplot(data, aes(y = b_exp_maf, x = b_iv)) # ------------------------------------------------------------------------------------------------------------------------------------------