##################################### # Summer Institute of Statistical Genetics 2016 # Module 18: Statistical & Quantitative Genetics of Disease # Naomi Wray # Practical 1 # Liability threshold model #################################### #Part 1 #Demonstration that multiple loci generate an approx normal distribution of the count of genetic values #Simulate 9 loci #Part 1a N = 10000 # sample size p = 0.5 # risk allele frequency n=9 # number of loci G=matrix(rep(0,N*n),nrow=N,ncol=n) # matrix of N people each with n loci R=matrix(rep(0,N*n),nrow=N,ncol=n) # count of risk loci for (i in 1: n){ G[,i]=rbinom(N,2,p) # simulate genotypes under H-W equilibrium if(i ==1) {R[,i]=G[,i]} # count of 1 locus else {R[,i]=R[,i-1]+G[,i]} # count or risk loci } opar<-par(mfrow=c(3,3)) for(i in 1:n){hist(R[,i],main=paste(i,"loci"))} par(opar) #Part 1b #Repeat for loci of allele frequency 0.1 #You might have to increase the number of loci to generate a normal distribution N = 10000 # sample size p = 0.1 # risk allele frequency n=100 G=matrix(rep(0,N*n),nrow=N,ncol=n) # matrix of N people each with n loci R=matrix(rep(0,N*n),nrow=N,ncol=n) # count of risk loci for (i in 1: n){ G[,i]=rbinom(N,2,p) # simulate genotypes under H-W equilibrium if(i ==1) {R[,i]=G[,i]} # count of 1 locus else {R[,i]=R[,i-1]+G[,i]} # count or risk loci } opar<-par(mfrow=c(3,3)) for(i in 1:n){hist(R[,i],main=paste(i,"loci"))} par(opar) #Part 1c #Repeat for loci of variable allele frequency #eg p=runif(n,0,1) #Part 1d #Skip this and come back if there is time at the end #Check for normal distribution of genetic values #################################### #Part 2 #Here we check the basic parameters of PHENOTYPIC liability #Simply run the code in Part 2a and 2b and understand #################################### #Section 2a #Normal distribution parameters #################################### # This section only works in Rstudio because of manipulate command # This just shoows phenotypic liability of population and of cases #################################### #install.packages('manipulate') library(manipulate) manipulate({ K=K # disease prevalence t=qnorm(1-K,0,1) # liability threshold z<-dnorm(t) # height of the normal distribution at T i<-z/K # mean liability of affected group k=i*(i-t) # variance reduction factor N=N #sample size P=rnorm(N) # M individuals with normally distributed phenotype liabilities D=c(rep(0,N)) # set all individuals to be non-diseased = 0 D[P>t]=1 # set those with a phenotypic liability > t to be diseased = 1 opar<-par(mfrow=c(1,2)) hist(P, xlim=c(-4,4),ylim=c(0,N/5),main="normally distributed liability phenotype") hist(P[D==1], xlim=c(-4,4),ylim=c(0,N/5), main="liability phenotype of those who are diseased") text(-3,(N/6),paste("mean liability of those who are diseased = ",round(mean(P[D==1]),digits=3)),pos=4) text(-3,(N/6)*0.90,paste("expected value, i =",round(i,digits=3)),pos=4) text(-3,(N/6)*0.80, paste("variance in liability of those who are diseased =", round(var(P[D==1]),digits=3)),pos=4) text(-3,(N/6)*0.70,paste("expected value, (1-i(i-t))= ",round((1-k),digits =3)),pos=4) }, N=slider(1,1e5, initial=1e4), K=slider(0,1,initial=.05)) #################################### #Section 2b #Here we check the basic parameters of GENETIC liability #################################### #Normal distribution parameters as in section 2a #Genetic parameters #################################### K=0.01 # disease prevalence t=qnorm(1-K,0,1) # liability threshold z<-dnorm(t) # height of the normal distribution at T i<-z/K # mean liability of affected group k=i*(i-t) # variance reduction factor h2=0.8 #heritability of liability #phenotypic variance is 1 Mend=0.5*h2 #within family segregation variance sdE=sqrt(1-h2) ##################################### #Simulate parents and a child under a polygenic model ##################################### N = 1e5 # number of families # Mums mumA<- rnorm(N,0,sqrt(h2)) # mothers additive genetic values drawn from a normal distribution with mean 0 and std dev sqrt(h2) mumE<- rnorm(N,0,sqrt(1-h2)) # mothers residual values drawn from a normal distribution with mean 0 and std dev sqrt(1-h2) mumP<-mumA + mumE # mother phenotypic liability values mumD<-rep(0,N) #set disease status of all mums to be not diseased 0 mumD[mumP > t]<-1 #set disease status of mums with phenotypic liability > threshold to be diseased =1 # Dads dadA<- rnorm(N,0,sqrt(h2)) # fathers additive genetic values drawn from a normal distribution with mean 0 and std dev sqrt(h2) dadE<- rnorm(N,0,sqrt(1-h2)) # fathers residual values drawn from a normal distribution with mean 0 and std dev sqrt(1-h2) dadP<-dadA + dadE # father phenotypic liability values dadD<-rep(0,N) #set disease status of all dads to be not diseased 0 dadD[dadP > t]<-1 #set disease status of dads with phenotypic liability > threshold to be diseased =1 # Probands childA<-rnorm(N,mean=0,sd=sqrt(Mend)) + 0.5*dadA + 0.5*mumA # a child's genetic liability is the mid-parent value plus a withinfamily deviation childE<-rnorm(N, mean=0, sd=sqrt(1-h2)) # fathers residual values drawn from a normal distribution with mean 0 and std dev sqrt(1-h2) childP<-childA + childE # child phenotypic liability values childD<-rep(0,N) #set disease status of all children to be not diseased 0 childD[childP > t]<-1 ################################## #Section 2c #feel the data - run line by line ################################## opar<-par(mfrow=c(2,2)) hist(childP, xlim=c(-4,4),ylim=c(0,N/3),main="Liability distributions",col='black',cex=0.4) hist(childA, add=T,col='blue') hist(childE, add=T,col='red') legend('topleft',legend=c('Phenotypic','Genetic','Residual'),fill=c('black','blue','red'),cex=0.6) midP=(dadP+mumP)/2 plot(midP[1:min(N,1000)],childP[1:min(N,1000)],main="Estimate h2 for QT ",cex=0.4, xlab="midP",ylab="childP") #only plots first 1000 l=lm(childP~0+midP) # regression of Child P on mid-parent P..slowest step when running abline(l) l=summary(l) mtext(paste("regression=",round(l$coefficients[1],digits=2),"+/-",round(l$coefficients[2],digits=2)),side=3,cex=0.6) hist(childP[dadD==1],ylim=c(0,N*K/4),main="Liability in kids of affected dads",col='black',cex=0.4) hist(childA[dadD==1], add=T,col='blue') hist(childE[dadD==1], add=T,col='red') legend('topleft',legend=c('Phenotypic','Genetic','Residual'),fill=c('black','blue','red'),cex=0.6) K_est=length(childD[childD==1])/N # one estimate of disease prevalence Kdad_est=length(childD[childD==1 & dadD==1])/length(dadD[dadD==1]) # estimate of risk of disease in kids of affected dads lambdaDad_est=Kdad_est/K_est #relative risk for kids of affected dads compared to a child selected at random Kmum_est=length(childD[childD==1 & mumD==1])/length(mumD[mumD==1]) # estimate of risk of disease in kids of affected mums lambdaMum_est=Kmum_est/K_est #relative risk for kids of affected mums compared to a child selected at random Kboth_est=length(childD[childD==1 & mumD==1 & dadD==1])/length(mumD[mumD==1 & dadD==1]) # estimate of risk of disease in kids when both parents are affected lambdaBoth_est=Kboth_est/K_est #relative risk for kids who have both parents affected compared to a child selected at random KEither_est=length(childD[childD==1 & (mumD==1 | dadD==1)])/length(mumD[mumD==1 | dadD==1]) # estimate of risk of disease in kids when both parents are affected lambdaEither_est=KEither_est/K_est #relative risk for kids who have both parents affected compared to a child selected at random barplot(c(lambdaDad_est, lambdaMum_est, lambdaBoth_est,lambdaEither_est),names=c("Dads","Mums","Both","Ether"),ylab="recurrence risk", main="Inc. risk in kids of affected..") par(opar) ################################## #Section 2d #Compare simulation with theory #run line by line ################################## cov(dadA,mumA) # expected value 0 cov(dadP,childP) # covariance in liabilities between dad and child cov(dadA,childA) # same because covariance is only from shared genetic factors 0.5*h2 # expected value mean(dadP[dadD==1]) # mean phenotypic liability of affected fathers i # expected value mean(childP[dadD==1]) # mean phenotypic liability of children of affected fathers 0.5*h2*i # expected value var(dadP[dadD==1]) # variance in phenotypic liability of affected fathers (1-k) # expected value (vc=var(childP[dadD==1])) # variance in phenotypic liability of children of affected fathers 1-k*(0.5*h2)^2 # expected value Kdad_est # estimate of risk of disease in kids of affected dads (calculated above) (Kdad=1-pnorm((t-0.5*h2*i)/sqrt(vc))) # expected value lambdaDad_est # estimated recurrence risk Kdad/K # expected value ################################### #Section 2e #Estimate heritability from recurrence risks to relatives #run line by line ################################### aR=0.5 #genetic relationship between parent and offspring t_est = qnorm(1-K_est) #threshold estimated from data tR_est = qnorm(1 - lambdaDad_est * K_est) #child threshold estimated from data z_est = dnorm(t_est) i_est = z_est/K_est h2_Falc=function(t,tR,i,aR){(t-tR)/(i*aR)} #Falconer (1965) estimate of heritability of liability (h2F_est=h2_Falc(t_est,tR_est,i_est,aR)) NR=length(dadD[dadD==1]) #number of families contributing to estimate of Kdad zR_est=dnorm(tR_est) h2se=function(aR,K,N,z,i,h2,t,KR,zR,NR) #standard error of heritability of liability - two lines {(1/aR)*sqrt((K*(1-K)/(N*z^2))*(1/i + aR*h2*(i-t))^2 +KR*(1-KR)/(i*i*zR*zR*NR))} (h2seF_est=h2se(aR,K_est,N,z_est,i_est,h2F_est,t_est,Kdad_est,zR_est,NR)) h2l=function(t,tR,i,aR){(t-tR*sqrt(1-(1-t/i)*(t^2-tR^2)))/(aR*(i+(i-t)*tR^2))} # heritability of liability with Reich et al correction **use this one (h2l_est=h2l(t_est,tR_est,i_est,aR)) (h2lse_est=h2se(aR,K_est,N,z_est,i_est,h2l_est,t_est,Kdad_est,zR_est,NR)) ################################### #Section 2f #Play and get a feel of parameters and sampling variance ################################### #1. Use parameters for 4 diseases 1) K=0.15 h2=0.4 2) K=0.15, h2=0.8, 3) K=0.01, h2=0.4, 4) K=0.01, h2=0.8 #2. Run the script - Sections 2a-e using N=e7, N=1000, N=100 #3. Record h2l_est for each of the 12 scenarios and add to class spreadsheet. ################################ #Section 2g #Extend the simulation to include different types of relatives ################################ #1. Add to the simulation a Monozygotic twin of the child #2. Add to the simulation a full-sibling of the child #3. Add to the simulation a paternal half-sibling of the child #3. Calculate lambdaMZ, lambdaFS, and lambdaHS #4. Estimate heritability of liability from lambdaMZ, lambdaFS, and lambdaHS