-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrevFig1a.r
106 lines (96 loc) · 5.4 KB
/
revFig1a.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
################################# Bar Plot Reads In Features Distribution Rev Fig1a ###########################
rm(list=ls(all=TRUE))
##########
setwd("D:/BOULOT/RECHERCHE/MeDIPseq/analyse_MeDIP/ElizabethCrowell/Bouinages_Guillaume/sommes de comptes normalises")
getwd()
########Chargement des dpnnées
message("loading data...")
library(ggplot2)
#Distribution of Counts in Features
counts <- read.table("TableOfSumsFinal.txt", header=T, sep="\t")
counts <- counts[1:5,]
colnames(counts)=c("Feature", colnames(counts[,2:22]))
counts[,2:22] <- cumsum(counts[,2:22])
counts <- data.frame (counts[,1],
(counts[,2]+counts[,4])/2,
(counts[,5]+counts[,6]+counts[,7])/3,
(counts[,8]+counts[,9])/2,
(counts[,10]+counts[,11])/2,
(counts[,12]+counts[,13]+counts[,14])/3,
(counts[,15]+counts[,16])/2,
(counts[,17]+counts[,18]+counts[,19])/3,
(counts[,20]+counts[,21]+counts[,22])/3
)
colnames(counts)=c("Feature", "ovo", "C", "Mor", "Bla", "Gas", "Tro", "Dla", "Spa")
Ovo <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$ovo[1:4]),ymax=c(counts$ovo[1:5]))
C <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$C[1:4]),ymax=c(counts$C[1:5]))
Mor <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Mor[1:4]),ymax=c(counts$Mor[1:5]))
Bla <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Bla[1:4]),ymax=c(counts$Bla[1:5]))
Gas <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Gas[1:4]),ymax=c(counts$Gas[1:5]))
Tro <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Tro[1:4]),ymax=c(counts$Tro[1:5]))
Dla <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Dla[1:4]),ymax=c(counts$Dla[1:5]))
Spa <- data.frame(Feature=c("CDS", "INT", "PRO", "REP", "TE"), ymin=c(0, counts$Spa[1:4]),ymax=c(counts$Spa[1:5]))
################################################################################
stages <- c("Ovocyte","2-8 cells","Morula","Blastula","Gastrula",
"Trocophore","D-larva","Spat")
# PREPARE THE COLOUR VECTORS FOR THE PLOT:
oricols <- c("aquamarine3","cadetblue3","chartreuse3","darkorchid3","deeppink3")
rgbs <- col2rgb(oricols)
hsvs <- rgb2hsv(rgbs)
valueseq <- seq(0.4, 0.9, length.out = 8)
HSV <- data.frame(h = rep(hsvs[1,], each = 8),
s = rep(hsvs[2,], each = 8),
v = rep(valueseq, 5))
HSV <- t(HSV)
colvec <- character(40)
for(i in 1:40) colvec[i] <- hsv(HSV[1,i], HSV[2,i], HSV[3,i])
colmat <- matrix(nrow = 8, ncol = 5, byrow = FALSE, data = colvec)
rownames(colmat) <- stages
colnames(colmat) <- c("CDS","INT","PRO","REP","TE")
p<- ggplot(Ovo, aes(fill=feature,ylab = "Proportion of reads mapped in features", ymax=ymax, ymin=ymin, xmax=0.90, xmin=0.1)) +
geom_rect(colour="black", fill=colmat[1,]) +
geom_rect(data=C, xmax=1.90, xmin=1.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[2,]) +
geom_rect(data=Mor, xmax=2.9, xmin=2.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[3,]) +
geom_rect(data=Bla, xmax=3.9, xmin=3.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[4,]) +
geom_rect(data=Gas, xmax=4.9, xmin=4.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[5,]) +
geom_rect(data=Tro, xmax=5.9, xmin=5.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[6,]) +
geom_rect(data=Dla, xmax=6.9, xmin=6.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[7,]) +
geom_rect(data=Spa, xmax=7.9, xmin=7.1, aes(ymax=ymax, ymin=ymin),colour="black", fill=colmat[8,]) +
#coord_polar(theta="y") +
xlim(c(0, 8)) +
#theme(panel.grid=element_blank()) +
#theme(panel.background=element_blank()) +
#theme(axis.text=element_text("feature")) +
#theme(axis.ticks=element_blank()) +
theme(aspect.ratio=4) +
labs(title="Distribution of mapped reads in features", xlab="Stage", ylab="proportion of reads mapped in features")
save.image(p, "revfig1a", png)
############# Peasron's chi square test for distribution
#chargement du fichier contenant le nb de counts
counts <- read.table("TableOfSumsFinalRPKM.txt", header=T, sep="\t")
counts <- data.frame (counts[,1],
(counts[,2]+counts[,4])/2,
(counts[,5]+counts[,6]+counts[,7])/3,
(counts[,8]+counts[,9])/2,
(counts[,10]+counts[,11])/2,
(counts[,12]+counts[,13]+counts[,14])/3,
(counts[,15]+counts[,16])/2,
(counts[,17]+counts[,18]+counts[,19])/3,
(counts[,20]+counts[,21]+counts[,22])/3
)
colnames(counts)=c("Feature", "ovo", "C", "Mor", "Bla", "Gas", "Tro", "Dla", "Spa")
Xsq <- chisq.test(
counts[,2:9], correct = TRUE,
p = rep(1/length(counts[,8:9]), length(counts[,8:9])), rescale.p = TRUE
)
### Kolmogorov Smirnof for pairwise comparisons between de stages
ks <- ks.test (counts$ovo, counts$C)
#### WAYYYYYY TO COMPLICATED AND UGLY !!!! WHAT SHOUL DBE USED IS ######
## counts <- read.table("TableOfSumsFinal.txt", header=T, sep="\t")
## colnames(counts) <- substr(colnames(counts), 1, nchar(colnames(counts))-1)
## bystage <- aggregate(t(counts), by = list(rownames(t(counts))), FUN = mean)
## rownames(bystage) <- bystage[ ,1]
##counts <- t(bystage[ ,-1])
### Reorder:
## ordre <- c("ovo","X28cell","morula","blastula","gastrula","troc","dlarv","spat")
## counts <- counts[ ,ordre]