-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPROvsmRNA.r
130 lines (104 loc) · 4.65 KB
/
PROvsmRNA.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
############## PROMOTERS / mRNA expression #############
rm(list=ls(all=TRUE))
##########
setwd("D:/BOULOT/RECHERCHE/MeDIPseq/analyse_MeDIP/ElizabethCrowell/Bouinages_Guillaume/Guillaume_SourceFiles")
getwd()
########Chargement des dpnnées
message("loading data...")
#variants
variants<- read.table("expdata/variants.txt", header=T, sep="\t")
#mRNA expression par stade (from oyster v9)
mRNAByStage<-read.table("expdata/expdevmerge.txt", header=T, sep="\t")
mRNACV<-read.table("expdata/expdevCV.txt", header=T, sep="\t")
#longueur des gènes pour normalisation
GENlength<-read.table("GENlength.txt", header=T, sep="\t")
#methylation
methCDS<-read.table("countsCDSfiltered.txt", header=T, sep="\t")
methINT<-read.table("countsINTfiltered.txt", header=T, sep="")
methGB<-read.table("countsGENfiltered.txt", header=T, sep="\t")
FractInCDS<-read.table("FractionInCDSDevAll.txt", header=T, sep="\t")
methCDSvsINT <- read.table("countsnCDS_INT_filtered_devall.txt",header = TRUE, sep="\t")
methPRO<- read.table("countsPROfiltered.txt",header = TRUE, sep="")
#Gene subsets
DevGiga<-read.table("gigaton_dev.txt",header = TRUE, sep="\t")
hox<-read.table("Homeobox_genes.txt",header = TRUE, sep="\t")
######### ANOVA ratioINTCDS vs DEV STAGE ###########
# ANOVA by row (factor is "development stage"):
ds <- methPRO
rownames(ds) <- ds[ ,1]; ds <- ds[ ,-1]
colnames(ds) <- substr(colnames(ds), 1, nchar(colnames(ds))-4)
testRes <- numeric(nrow(ds))
message("Running ANOVA against development stages...")
for (i in 1:nrow(ds))
{
values <- as.numeric(ds[i, ])
test <- aov(values ~ as.factor(colnames(ds)))
testRes[i] <- summary(test)[[1]]$"Pr(>F)"[1]
}
methPRODevAll<-data.frame(methPRO, testRes)
# Exclude all genes rows for which ANOVA gave non-significant results:
df<-methPRODevAll[!is.na(methPRODevAll$testRes)& methPRODevAll$testRes< 0.01,]
message(paste(dim(df)[1], " genes kept", sep = ""))
#moyenne par stade colonnes ordonnées
colnames(methPRO)
# [1] "GeneID" "X28cell1pro" "X28cell2pro" "X28cell3pro" "blastula2pro"
#[6] "blastula3pro" "dlarv1pro" "dlarv2pro" "dlarv3pro" "gastrula1pro"
#[11] "gastrula2pro" "gastrula3pro" "morula1pro" "morula2pro" "ovo1pro"
#[16] "ovo2pro" "spat1pro" "spat2pro" "spat3pro" "troc1pro"
#[21] "troc2pro"
methPROByStage<-data.frame(df[,1],
(df[,15]+df[,16])/2,#ovo
(df[,2]+df[,3]+df[,4])/3,#28
(df[,13]+df[,14])/2,#mor
(df[,5]+df[,6])/2,#bla
(df[,10]+df[,11]+df[,12])/3,#gas
(df[,20]+df[,21])/2,#tro
(df[,7]+df[,8]+df[,8])/3,#Dla
(df[,17]+df[,18]+df[,19])/3 #spa
)
colnames(methPROByStage)<-c("GeneID","ovo","x28","mor","bla","gas","tro","Dla","spa")
#jointure methPRO et expression
pro.exp<-merge (methPROByStage, mRNAByStage, by="GeneID")
pro.exp<-as.data.frame(pro.exp)
#filtrage expression nulle
pro.exp<-pro.exp[pro.exp$expmoydev!=0,]
#Préparation du data.frame qui reçoit les résultats de corrélation
correlation<-data.frame(c(0),c(0),c(0),c(0))
colnames(correlation)<-c("id","p.value","r2","pente")
i<-1
#correlation
for (i in 1:nrow(pro.exp))
{
#correlation[,1]<-exp.met
y<-as.numeric(t(pro.exp[i,2:9]))
x<-as.numeric(t(pro.exp[i,10:17]))
lm.x.y<-lm(y~x)
resultat<-summary(lm(y~x))[[4]]
p<-resultat[2,4]
r2<-summary(lm(y~x))[[8]]
pente<-resultat[2,1]
#correlation[i,1]<-id
#correlation[i,1]<-exp.met[i,1]
correlation[i,2]<-p
correlation[i,3]<-r2
correlation[i,4]<-pente
}
correlation[,1]<-pro.exp[,1]
#write.table(correlation, "resultat.txt", sep=";", row.names=F,col.names=T)
#sélectionner les corrélations inférieures à un seuil
correlation.significative<-correlation[correlation$p.value<=0.05,]
#write.table(correlation.significative, "resultat_signif.txt", sep=";", row.names=F,col.names=T)
#représentation graphique
id.significatif<-row.names(correlation.significative)
id.significatif<-as.integer(id.significatif)
summary(correlation.significative)
summary(correlation.significative$pente>0)
plot(x=pro.exp[id.significatif,]$ovo, y=pro.exp[id.significatif,]$expov)
#save.image("evsgbMsovo.jpg")
plot(x=pro.exp[id.significatif,]$x28, y=pro.exp[id.significatif,]$exp2.8cells)
plot(x=pro.exp[id.significatif,]$mor, y=pro.exp[id.significatif,]$expmo)
plot(x=pro.exp[id.significatif,]$bla, y=pro.exp[id.significatif,]$expbl)
plot(x=pro.exp[id.significatif,]$gas, y=pro.exp[id.significatif,]$expga)
plot(x=pro.exp[id.significatif,]$tro, y=pro.exp[id.significatif,]$exptr)
plot(x=pro.exp[id.significatif,]$Dla, y=pro.exp[id.significatif,]$expdl)
plot(x=pro.exp[id.significatif,]$spa, y=pro.exp[id.significatif,]$expsp)