##################################### # Summer Institute of Statistical Genetics 2016 # Module 18: Statistical & Quantitative Genetics of Disease # Naomi Wray # Practical 5 # Genetic Risk Models # Purpose to compare genetic risk models and show that they are exchangeable based on available data # to demonstrate why the liability threshold model is the model of choice #################################### #Section 1 #Additive risk model. #Section 1a) Set up the simulation and explore the data #################################### N = 1e5 # number of families n = 100 # number of loci R = 1.1 # relative risk of each risk allele p = 0.2 # allele frequency of each risk allele K = 0.01 # probability of disease b=K/(2*n*p*R) # weight of each risk allele to be consistent with the observed population risk of disease of disease indA=rbinom(N,2*n,p) # number of risk loci carried by each individual indR=indA*b*R # probability of disease (number of risk alleles * RR of each allele* linear regression) indu=runif(N,0,1) # random uniform number between 0 and 1 indD=c(rep(0,N)) # disease status set all to not diseased indD[indR>indu]=1 # identify diseased individuals round(length(indD[indD==1])/N,digits =3) # check disease frequency is correct opar<-par(mfrow=c(2,2)) hist(indA,xlab="indA = # risk alleles",main="Histogram of # risk alleles") hist(indR,xlab="indR = probability of disease given # risk alleles ",main="Histogram of probability of disease") plot(indA[1:min(1e5,N)],indR[1:min(1e5,N)],xlab='indA = # risk alleles',ylab="indR = probability of disease",main="Relationship between # alleles & prob of disease") hist(indA[indD==0],col='black',xlab='indA = # risk alleles',main='Histogram of # risk alleles',xlim=c(min(indA),max(indA))) hist(indA[indD==1],col='red',add=T) legend('topleft',legend=c('Not diseased','Diseased'),fill=c('black','red')) par(opar) #################################### #Section 1b) Test under different scenarios #################################### # Run the script in section 1a several times changing n,R,p,K e.g R=1.5, p=0.5, K=0.10 # Look at relationship between number of risk alleles and probability of disease, # could this be consistent with increased risk to relatives # Pay particular attention to the maximum value of the probability of disease #################################### #Section 2 #Multiplicative risk model. Slatkin 2008 #Section 2a) Set up the simulation and explore the data #################################### N = 1e5 # number of families n = 100 # number of loci R = 1.1 # relative risk of each risk allele p = 0.2 # allele frequency of each locus K = 0.1 # probabilty of disease k0 = K/(1+p*(R-1))^(2*n) # probability of disease in those that carry no risk alleles indA=rbinom(N,2*n,p) # number of risk loci carried by each individual indR=exp(log(k0)+indA*log(R)) # probability of disease, same as indR=k0*R^indA, but avoids numerical issues of power # multiplicative on the disease scale but additive on the log scale indu=runif(N,0,1) # uniform number indD=c(rep(0,N)) # disease status set all to not diseased indD[indR>indu]=1 # identify diseased individuals #indR[indR>1]=1 round(length(indD[indD==1])/N,digits =3) # check disease frequency is correct round(k0,digits=6) # print probability of disease in those that carry no risk alleles opar<-par(mfrow=c(2,2)) hist(indA,xlab="indA = # risk alleles",main="Histogram of # risk alleles") hist(indR,xlab="indR = probability of disease given # risk alleles ",main="Histogram of probability of disease") plot(indA[1:min(1e5,N)],indR[1:min(1e5,N)],pch=".",xlab='indA = # risk alleles',ylab="indR = probability of disease",main="Relationship between # alleles & prob of disease") hist(indA[indD==0],col='black',xlab='indA = # risk alleles',main='Histogram of # risk alleles',xlim=c(min(indA),max(indA))) hist(indA[indD==1],col='red',add=T) legend('topleft',legend=c('Not diseased','Diseased'),fill=c('black','red')) par(opar) #################################### #Section 2b) Test under different scenarios #################################### # Run the script in section 2b several times changing n,R,p,K, eg i) K=0.1, ii) K=0.1,R=1.2 # Pay particular attention to the maximum value of the probability of disease # What do you conclude? # After the line indR= add the line indR[indR>1]=1 # What does this do and what impact does this have? #################################### #Section 3 #Logistic risk model. Janssens et al (2006) Pedictive testing for complex diseases using multiple genes: Fact or fiction? Genetics in Medicine #Section 3a) Set up the simulation and explore the data #################################### N = 1e5 # number of families n = 100 # number of loci R = 1.5 # odds ratio of each risk allele p = 0.2 # allele frequency of each locus K = 0.1 # probabilty of disease prOdds=1/(n*R)^10 #Starting value for odds of disease for those carrying no risk loci indA=rbinom(N,2*n,p) # number of risk loci carried by each individual K_est=0 while (K_est< K){ #iterarte to find a prOdds for those carrying no risk loci that is consistent with K poOdds=prOdds*R^indA indR=poOdds/(1+poOdds) indu=runif(N,0,1) # uniform number indD=c(rep(0,N)) # disease status set all to not diseased indD[indR>indu]=1 # identify diseased individuals K_est =length(indD[indD==1])/N # trying to get this to match K prOdds=prOdds*1.05 } round(length(indD[indD==1])/N,digits =3) # check disease frequency is correct opar<-par(mfrow=c(2,2)) hist(indA,xlab="indA = # risk alleles",main="Histogram of # risk alleles") hist(indR,xlab="indR = probability of disease given # risk alleles ",main="Histogram of probability of disease") plot(indA[1:min(1e5,N)],indR[1:min(1e5,N)],pch=".",xlab='indA = # risk alleles',ylab="indR = probability of disease",main="Relationship between # alleles & prob of disease") hist(indA[indD==0],col='black',xlab='indA = # risk alleles',main='Histogram of # risk alleles',xlim=c(min(indA),max(indA))) hist(indA[indD==1],col='red',add=T) legend('topleft',legend=c('Not diseased','Diseased'),fill=c('black','red')) par(opar) #################################### #Section 3b) Test under different scenarios #################################### # Run the script in section 3a several times changing n,R,p,K # Dont go higher than n=100, R =2 ..otherwise goestoo slowly # Pay particular attention to the maximum value of the probability of disease # Is this a useful parameterization to model disease ################################## #Section 4 Liability threshold model ################################## N = 1e5 # number of families h2 = 0.8 # heritability of liability that reflects many combinations of p, R and n K = 0.10 # probabilty of disease indA=rnorm(N,0,sqrt(h2)) # N(0,1) representation of number of risk allele t=qnorm(1-K) indR=pnorm((indA-t)/sqrt(1-h2)) indu=runif(N,0,1) # uniform number indD=c(rep(0,N)) # disease status set all to not diseased indD[indR>indu]=1 # identify diseased individuals round(length(indD[indD==1])/N,digits =3) # check disease frequency is correct opar<-par(mfrow=c(2,2)) hist(indA,xlab="indA = # risk alleles",main="Histogram of # risk alleles") hist(indR,xlab="indR = probability of disease given # risk alleles ",main="Histogram of probability of disease") plot(indA[1:min(1e5,N)],indR[1:min(1e5,N)],pch=".",xlab='indA = # risk alleles',ylab="indR = probability of disease",main="Relationship between # alleles & prob of disease") hist(indA[indD==0],col='black',xlab='indA = # risk alleles',main='Histogram of # risk alleles',xlim=c(min(indA),max(indA))) hist(indA[indD==1],col='red',add=T) legend('topleft',legend=c('Not diseased','Diseased'),fill=c('black','red')) par(opar) ################################# # Run the script several times changing the parameters # Is this a useful parameterization to model disease? ##################################