we are going to begin with some useful R functions
we are going to begin with some useful R functions
simulate data from a normal distribution
args(rnorm)
## function (n, mean = 0, sd = 1)
## NULL
height<-rnorm(1000,170,2.8)
mean(trait)
## [1] -0.0008011
sd(trait)
## [1] 1.014
plot(density(height, adjust=3), main='')
we can use rnorm to simulate a phenotype and SNP data
y.means <- c(5, 15, 20)
y.sd <- 5
snp <-rbinom(1000,2,0.4)
y <- rnorm(1000,y.means[factor(snp)],y.sd)
snp_data<-data.frame(cbind(y,snp))
library(ggplot2)
p<-ggplot(snp_data, aes(x=factor(snp), y=y))
p + geom_boxplot()
simulate data from a multivariate normal distribution
library(MASS)
args(mvrnorm)
## function (n = 1, mu, Sigma, tol = 1e-06, empirical = FALSE, EISPACK = FALSE)
## NULL
height_bmi<-data.frame(mvrnorm(1000, c(170,24), matrix(c(1,0.1,0.1,1),2,2)))
names(height_bmi)<-c("height","bmi"); height_bmi[1:5,]
## height bmi
## 1 169.7 23.47
## 2 169.1 23.16
## 3 169.5 23.01
## 4 171.2 25.34
## 5 170.1 23.26
c(mean(height_bmi$height),mean(height_bmi$bmi))
## [1] 179.98 24.02
cor(height_bmi$height,height_bmi$bmi)
## [1] 0.108
p<-ggplot(height_bmi, aes(x=height,y=bmi))
p + geom_point() + geom_smooth(method='lm')
300 pairs of twins of with mean height = 170 and SD = 2.8
twins<-data.frame(mvrnorm(300, c(170,170), 2.8*(matrix(c(1,0.7,0.7,1),2,2))))
names(twins)<-c("twin1", "twin2"); twins[1:4,]
## twin1 twin2
## 1 169.8 172.9
## 2 170.0 169.6
## 3 170.3 169.9
## 4 169.4 169.7
cor(twins$twin1,twins$twin2)
## [1] 0.6627
read the data into R
data <- read.table("twin_HT_BMI.txt", header=T)
head(data)
## dob ht_t1 bmi_t1 ht_t2 bmi_t2 sex twin
## 1 1902 165.1 25.86 173.0 24.05693475 1 MZ
## 2 1903 175.3 22.74 143.0 33.74248129 1 MZ
## 3 1910 164.0 28.63 165.0 27.54820937 1 MZ
## 4 1906 167.6 21.47 167.0 18.28677973 1 MZ
## 5 1911 168.0 23.03 167.6 19.94805977 1 MZ
## 6 1912 172.7 25.15 166.0 28.30599506 1 MZ
what is the DZ and MZ correlation for height and BMI?
is it the same for both sexes?
what is the heritability from these estimates?
hint:
data_MZ <- data[data$twin=="MZ",]
cor(data_MZ$ht_t1, data_MZ$ht_t2)
## [1] 0.9187
Pedigree statistics
# general options:
pedstats -d HT_BMI.dat -p HT_BMI.ped
# useful options
--pair, --byFamily, --bySex, --pdf
pedigree file
data file
example of pedigree file
ped<-read.table("HT_BMI.ped", header=F)
head(ped)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11
## 1 1 1 3 4 1 MZ 179.000 0.082 18.814 2.935 -1.440
## 2 1 2 3 4 1 MZ 180.340 0.373 24.408 3.195 -0.252
## 3 1 3 0 0 1 0 x x x x x
## 4 1 4 0 0 2 0 x x x x x
## 5 2 5 0 0 2 0 167.640 0.682 23.841 3.171 -0.186
## 6 1 6 3 4 1 0 185.420 0.993 24.936 3.216 0.008
library(kinship2)
p<-pedigree(ped$V2[1:12], ped$V3[1:12], ped$V4[1:12], ped$V5[1:12])
plot.pedigree(p)
## Did not plot the following people: 5
example data file
dat<-readLines("HT_BMI.dat")
dat
## [1] " Z Zygosity " " T height " " T height_z_score "
## [4] " T BMI " " T logBMI " " T BMI_z_score "
## [7] " E END-OF-DATA "
pedstats -d HT_BMI.dat -p HT_BMI.ped
#options
--pair, --byFamily, --bySex, --pdf
pedstats -d HT_BMI.dat -p HT_BMI.ped--pdf