-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfirstT_diff_expression.R
90 lines (77 loc) · 2.62 KB
/
firstT_diff_expression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
BiocManager::install("edgeR", force = TRUE)
library(edgeR)
library(dplyr)
## copy previous r code and find the differentially expressed
## genes in each trimester rather than in the entire
## preeclampsia table
first_trimester_data_df = read.table("~/Desktop/HORIZONS-Pregnancy-Data/first_trimester_data.txt", header = TRUE)
first_trimester_data = data.matrix(first_trimester_data_df)
#head(preeclampsia_firstT)
diseases_df = read.table("~/Desktop/HORIZONS-Pregnancy-Data/first_trimester_diseases.txt", header = TRUE)
diseases = diseases_df$disease
d <- DGEList(counts=first_trimester_data,group=factor(diseases))
d
##phenotype_groups
##ncol(read_counts)
##filtering the data
dim(d)
d.full <- d
head(d$counts)
head(cpm(d))
apply(d$counts, 2, sum)
keep <- rowSums(cpm(d)> quantile(cpm(d), c(.10))) >= 2
d <- d[keep,]
dim(d)
##mean(rowSums(cpm(d)))
##quantile(cpm(d), c(.10))
d$samples$lib.size <- colSums(d$counts)
d$samples
##normalizing the data
d <- calcNormFactors(d)
d
## data exploration
plotMDS(d, method="bcv", col=as.numeric(d$samples$group))
legend("bottomleft", as.character(unique(d$samples$group)), col=1:3, pch=20)
## drop outliers
#estimating the dispersion
d1 <- estimateCommonDisp(d, verbose=T)
names(d1)
d1 <- estimateTagwiseDisp(d1)
names(d1)
plotBCV(d1)
## creating a generalized linear model fit
design.mat <- model.matrix(~ 0 + d$samples$group)
colnames(design.mat) <- levels(d$samples$group)
d2 <- estimateGLMCommonDisp(d,design.mat)
d2 <- estimateGLMTrendedDisp(d2,design.mat, method="power")
## can change power
d2 <- estimateGLMTagwiseDisp(d2,design.mat)
plotBCV(d2)
## ------------------- EVERYTHING VS NORMAL ----------------------
## 3 = normal 4 = preeclampsia
et43 <- exactTest(d1, pair=c(4, 3))
topTags(et43)
write.table(et43, "preeclamspsia_diffe_firstT.txt")
de43 <- decideTestsDGE(et43, adjust.method="BH", p.value=0.05)
summary(de43)
de1tags43 <- rownames(d1)[as.logical(de43)]
plotSmear(et43, de.tags=de1tags43)
abline(h = c(-2, 2), col = "blue")
## 3 = normal 1 = chronic hypertension
et13 <- exactTest(d1, pair=c(1, 3))
topTags(et13)
write.table(et13, "chronic_hypertension_diffe_firstT.txt")
de13 <- decideTestsDGE(et13, adjust.method="BH", p.value=0.05)
summary(de13)
de1tags13 <- rownames(d1)[as.logical(de13)]
plotSmear(et13, de.tags=de1tags13)
abline(h = c(-2, 2), col = "blue")
## 3 = normal 2 = gestational diabetes
et23 <- exactTest(d1, pair=c(2, 3))
topTags(et23)
write.table(et23, "gestational_diabetes_diffe_firstT.txt")
de23 <- decideTestsDGE(et23, adjust.method="BH", p.value=0.05)
summary(de23)
de1tags23 <- rownames(d1)[as.logical(de23)]
plotSmear(et23, de.tags=de1tags23)
abline(h = c(-2, 2), col = "blue")