-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTADPOLE_NL_to_AD.Rmd
324 lines (254 loc) · 12.1 KB
/
TADPOLE_NL_to_AD.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
---
title: "TADPOLE"
author: "José Tamez-Peña"
date: "October 20, 2017"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r, echo = FALSE}
knitr::opts_chunk$set(collapse = TRUE, warning = FALSE, message = FALSE,comment = "#>")
```
```{r Libraries, echo = FALSE}
library("epiR")
library("FRESA.CAD")
library(network)
library(GGally)
library("e1071")
TADPOLE_BASIC <- read.delim("AllSubjectsWithBasicInfo.txt")
TADPOLE_CROSSMRI <- read.delim("CompleMRIObs.txt",na.strings = c("NA","","#DIV/0!"))
TADPOLE_predictors <- read.delim("PredictorsList.txt")
rownames(TADPOLE_BASIC) <- as.character(TADPOLE_BASIC$IDTIME)
rownames(TADPOLE_CROSSMRI) <- as.character(TADPOLE_CROSSMRI$ID_TP)
```
```{r subsets}
months <- c(0,3,6,12,18,24,30,36,42,48,54,60,66,72,78,84,90,96,102,108,114,120)
TRAIN_TADPOLEMRI <- subset(TADPOLE_CROSSMRI,TADPOLE_CROSSMRI$D1==1 & TADPOLE_CROSSMRI$D2==0)
TRAIN_TADPOLEBASIC <- subset(TADPOLE_BASIC,TADPOLE_BASIC$D1==1 & TADPOLE_BASIC$D2==0)
TEST_TADPOLEMRI <- subset(TADPOLE_CROSSMRI,TADPOLE_CROSSMRI$D2==1)
TEST_TADPOLEBASIC <- subset(TADPOLE_BASIC,TADPOLE_BASIC$D2==1)
```
```{r Adjusting for age and ICV}
TRAIN_TADPOLEMRI$cICV <- TRAIN_TADPOLEMRI$ICV^(1/3)
TEST_TADPOLEMRI$cICV <- TEST_TADPOLEMRI$ICV^(1/3)
rownames(TADPOLE_predictors) <- as.character(TADPOLE_predictors[,1])
prednames <- as.character(TADPOLE_predictors[-c(1:17),1])
TRAIN_TADPOLEMRI <- TRAIN_TADPOLEMRI[complete.cases(TRAIN_TADPOLEMRI[,prednames]),]
CTest_TADPOLEMRI <- TEST_TADPOLEMRI[complete.cases(TEST_TADPOLEMRI[,prednames]),]
ControlNormal <- subset(TRAIN_TADPOLEMRI,BXBL=="CN" & LastDX2=="NL" & LastVisit>2 & Month==0)
ControlNormal2 <- subset(CTest_TADPOLEMRI,BXBL=="CN" & LastDX2=="NL" & LastVisit>2 & Month==0)
ControlNormal <- rbind(ControlNormal,ControlNormal2)
sum(ControlNormal$PTGENDER==1)
sum(ControlNormal$PTGENDER==2)
hist(ControlNormal$MeanT)
hist(subset(ControlNormal,PTGENDER==1)$MeanT)
hist(subset(ControlNormal,PTGENDER==2)$MeanT)
trainTadploe.adj <- featureAdjustment(TADPOLE_predictors[prednames,], baseModel="1+AGE+cICV",data=TRAIN_TADPOLEMRI,referenceframe=ControlNormal,strata="PTGENDER", type = "LM", pvalue = 0.001)
testTadploe.adj <- featureAdjustment(TADPOLE_predictors[prednames,], baseModel="1+AGE+cICV",data=CTest_TADPOLEMRI,referenceframe=ControlNormal,strata="PTGENDER", type = "LM", pvalue = 0.001)
ControlNormal.adj <- featureAdjustment(TADPOLE_predictors[prednames,], baseModel="1+AGE+cICV",data=ControlNormal,referenceframe=ControlNormal,strata="PTGENDER", type = "LM", pvalue = 0.001)
hist(subset(ControlNormal.adj,PTGENDER==1)$MeanT,breaks = 15)
hist(subset(ControlNormal.adj,PTGENDER==2)$MeanT,breaks = 15)
hist(subset(ControlNormal.adj,PTGENDER==1)$MeanSAD,breaks = 15)
hist(subset(ControlNormal.adj,PTGENDER==2)$MeanSAD,breaks = 15)
#testTadploe.norm <- rankInverseNormalDataFrame(TADPOLE_predictors[prednames,], testTadploe.adj, ControlNormal.adj,strata="PTGENDER")
trainTadploe.norm <- rankInverseNormalDataFrame(TADPOLE_predictors[prednames,], trainTadploe.adj, ControlNormal.adj)
testTadploe.norm <- rankInverseNormalDataFrame(TADPOLE_predictors[prednames,], testTadploe.adj, ControlNormal.adj)
mean(testTadploe.norm$MeanT)
mean(trainTadploe.norm$MeanT)
mean(testTadploe.norm$MeanSAD)
mean(trainTadploe.norm$MeanSAD)
plot(testTadploe.norm$MeanT~testTadploe.adj$MeanT)
plot(trainTadploe.norm$MeanT~trainTadploe.adj$MeanT)
plot(testTadploe.norm$MeanSAD~testTadploe.adj$MeanSAD)
plot(trainTadploe.norm$MeanSAD~trainTadploe.adj$MeanSAD)
```
```{r spliting by visit}
VISIT_TRAINCROSSMRI <- list()
VISIT_TRAINBASIC <- list()
VISIT_TESTCROSSMRI <- list()
VISIT_TESTBASIC <- list()
i = 1;
for (j in months )
{
VISIT_TRAINCROSSMRI[[i]] <- subset(trainTadploe.adj,trainTadploe.adj$Month==j)
VISIT_TRAINBASIC[[i]] <- subset(TRAIN_TADPOLEBASIC,TRAIN_TADPOLEBASIC$Month2==j)
VISIT_TESTCROSSMRI[[i]] <- subset(testTadploe.adj,testTadploe.adj$Month==j)
VISIT_TESTBASIC[[i]] <- subset(TEST_TADPOLEBASIC,TEST_TADPOLEBASIC$Month2==j)
i = i + 1
}
sampledcolumns <- c("RID",as.character(TADPOLE_predictors[,1]));
testLastTimePointWithNL <- subset(testTadploe.adj,LastComleteObs==1 & LastDX2=="NL")
testLastTimePointWithMCI <- subset(testTadploe.adj,LastComleteObs==1 & LastDX2=="MCI")
testLastTimePointWithAD <- subset(testTadploe.adj,LastComleteObs==1 & LastDX2=="Dementia")
testWithNL <- subset(testTadploe.adj,LastDX2=="NL")
testWithMCI <- subset(testTadploe.adj,LastDX2=="MCI")
testWithAD <- subset(testTadploe.adj,LastDX2=="Dementia")
checkIDs <- c(as.character(testLastTimePointWithNL$RID),as.character(testLastTimePointWithMCI$RID),as.character(testLastTimePointWithAD$RID))
#write.csv(checkIDs,file="TestIDsWithCO.csv")
```
```{r univariate NL to AD}
CASESNLtoAD <- NULL
yearNLtoAD <- NULL
controlYearNLtoAD <- NULL;
CONTROLNLtoAD <- NULL
basecolumns <- sampledcolumns[-c(1:7)]
tmpss <- subset(VISIT_TRAINCROSSMRI[[1]],NL_AD==1 & BDX==1)
BLCASESNLtoAD <- tmpss[,basecolumns]
tmpss <- subset(VISIT_TESTCROSSMRI[[1]],NL_AD==1 & BDX==1)
BLCASESNLtoAD <- rbind(BLCASESNLtoAD,tmpss[,basecolumns])
tmpss <- subset(VISIT_TRAINCROSSMRI[[1]],BXBL=="CN" & LastDX2=="NL" & LastVisit>3)
BLCONTROLNLtoAD <- tmpss[,basecolumns]
tmpss <- subset(VISIT_TESTCROSSMRI[[1]],BXBL=="CN" & LastDX2=="NL" & LastVisit>3)
BLCONTROLNLtoAD <- rbind(BLCONTROLNLtoAD,tmpss[,basecolumns])
for (j in 1:length(VISIT_TRAINCROSSMRI) )
{
tmpss <- subset(VISIT_TRAINCROSSMRI[[j]],NL_AD==1 & BDX==1)
yearNLtoAD <- c(yearNLtoAD,(tmpss$YearNLtoAD))
CASESNLtoAD <- rbind(CASESNLtoAD,tmpss[,sampledcolumns])
tmpss <- subset(VISIT_TESTCROSSMRI[[j]],NL_AD==1 & BDX==1 )
yearNLtoAD <- c(yearNLtoAD,(tmpss$YearNLtoAD))
CASESNLtoAD <- rbind(CASESNLtoAD,tmpss[,sampledcolumns])
tmpss <- subset(VISIT_TRAINCROSSMRI[[j]],BXBL=="CN" & LastDX2=="NL" & LastVisit>3)
controlYearNLtoAD <- c(controlYearNLtoAD,(tmpss$YearNLtoAD))
CONTROLNLtoAD <- rbind(CONTROLNLtoAD,tmpss[,sampledcolumns])
tmpss <- subset(VISIT_TESTCROSSMRI[[j]],BXBL=="CN" & LastDX2=="NL" & LastVisit>3)
controlYearNLtoAD <- c(controlYearNLtoAD,(tmpss$YearNLtoAD))
CONTROLNLtoAD <- rbind(CONTROLNLtoAD,tmpss[,sampledcolumns])
}
CASESNLtoAD <- cbind(yearNLtoAD,CASESNLtoAD)
CASESNLtoAD <- CASESNLtoAD[complete.cases(CASESNLtoAD),]
CONTROLNLtoAD <- cbind(controlYearNLtoAD,CONTROLNLtoAD)
CONTROLNLtoAD <- CONTROLNLtoAD[complete.cases(CONTROLNLtoAD),]
NLAZUniRankFeaturesRaw <- univariateRankVariables(variableList = TADPOLE_predictors,
formula = "yearNLtoAD ~ 1",
Outcome = "yearNLtoAD",
data = CASESNLtoAD,
categorizationType = "Raw",
type = "LM",
rankingTest = "Ztest",
description = "Description",
uniType="Regression")
```
```{r Modeling NL to AD}
ids <- unique(as.character(CASESNLtoAD$RID))
NLtoAD <- list();
baggedformula <- character();
baggedformula2 <- character();
for (n in 1:100)
{
singlecaseNLtoAD <- NULL;
for (i in ids)
{
case1 <- subset(CASESNLtoAD,RID==i)
caserows <- nrow(case1)
if (caserows>1)
{
singlecaseNLtoAD <- rbind(singlecaseNLtoAD,case1[sample(caserows, 1),])
}
else
{
singlecaseNLtoAD <- rbind(singlecaseNLtoAD,case1)
}
}
singlecaseNLtoAD$RID <- NULL;
print(nrow(singlecaseNLtoAD))
system.time(NLtoAD[[n]] <- FRESA.Model(yearNLtoAD ~ 1,singlecaseNLtoAD))
if (length(NLtoAD[[n]]$bagging$bagged.model$coefficients)>1)
{
baggedformula <- append(baggedformula,NLtoAD[[n]]$bagging$formula)
}
if (length(NLtoAD[[n]]$BSWiMS.model$coefficients)>1)
{
baggedformula2 <- append(baggedformula2,NLtoAD[[n]]$BSWiMS.model$formula)
}
}
save(NLtoAD,file="NLtoAD.RDATA")
mp <- medianPredict(baggedformula,CASESNLtoAD,predictType ="linear",type="LM")
plot(CASESNLtoAD$yearNLtoAD~mp$medianPredict)
cor.test(CASESNLtoAD$yearNLtoAD,mp$medianPredict)
bgmod <- baggedModel(as.character(baggedformula2),CASESNLtoAD,type="LM",univariate=NLtoAD[[1]]$univariateAnalysis)
barplot(bgmod$frequencyTable)
mpb <- medianPredict(bgmod$formula,CASESNLtoAD,predictType ="linear",type="LM")
plot(CASESNLtoAD$yearNLtoAD~mpb$medianPredict)
cor.test(CASESNLtoAD$yearNLtoAD,mpb$medianPredict)
testLastTimePointWithNL$yearNLtoAD <- numeric(nrow(testLastTimePointWithNL))
mp <- medianPredict(baggedformula,CASESNLtoAD,testLastTimePointWithNL,predictType ="linear",type="LM")
cdft <- NULL
x <- (1:60)/12
for (i in 1:nrow(mp$predictions))
{
fn <- ecdf(as.numeric(mp$predictions[i,-1]))
cdft <- rbind(cdft,fn(x))
}
rownames(cdft) <- rownames(mp$predictions)
write.csv(cdft,file="NLtoADTimeCDF.csv")
write.csv(mp$predictions,file="NLtoADTimePrediction.csv")
```
```{r predicting CN to AD conversion}
classNLtoAD <- CASESNLtoAD;
classNLtoAD$yearNLtoAD <- NULL
classNLtoAD$Event <- 1
cclassNLtoAD <- CONTROLNLtoAD
cclassNLtoAD$controlYearNLtoAD <- NULL
cclassNLtoAD$Event <- 0
samplecontrol <- sample(nrow(cclassNLtoAD),nrow(cclassNLtoAD)/2)
#sampletrain <- sample(nrow(classNLtoAD),4*nrow(classNLtoAD)/5)
#trainDataNLtoAD <- rbind(classNLtoAD[sampletrain,],cclassNLtoAD[samplecontrol,])
trainDataNLtoAD <- rbind(classNLtoAD,cclassNLtoAD[samplecontrol,])
testDataNLtoAD <- rbind(classNLtoAD,cclassNLtoAD[-samplecontrol,])
ids <- unique(as.character(trainDataNLtoAD$RID))
PreNLtoAD <- list();
baggedformula <- character();
baggedformula2 <- character();
for (n in 1:25)
{
singlecaseNLtoAD <- NULL;
for (i in ids)
{
case1 <- subset(trainDataNLtoAD,RID==i)
caserows <- nrow(case1)
if (caserows>1)
{
singlecaseNLtoAD <- rbind(singlecaseNLtoAD,case1[sample(caserows, 1),])
}
else
{
singlecaseNLtoAD <- rbind(singlecaseNLtoAD,case1)
}
}
singlecaseNLtoAD$RID <- NULL;
system.time(PreNLtoAD[[n]] <- FRESA.Model(Event ~ 1,singlecaseNLtoAD))
}
save(PreNLtoAD,file="LogitLNtoAD.RDATA")
for (n in 1:25)
{
if (length(PreNLtoAD[[n]]$bagging$bagged.model$coefficients)>1)
{
baggedformula <- append(baggedformula,PreNLtoAD[[n]]$bagging$formula)
}
if (length(PreNLtoAD[[n]]$BSWiMS.model$coefficients)>1)
{
baggedformula2 <- append(baggedformula2,PreNLtoAD[[n]]$BSWiMS.model$formula)
}
}
bgmod <- baggedModel(as.character(baggedformula2),trainDataNLtoAD,type="LOGIT",univariate=PreNLtoAD[[1]]$univariateAnalysis)
barplot(bgmod$frequencyTable)
mpb <- medianPredict(bgmod$formula,trainDataNLtoAD,predictType ="linear",type="LOGIT")
pm2b <- plotModels.ROC(mpb$predictions,main="Bagging",cex=0.90)
mptb <- medianPredict(bgmod$formula,trainDataNLtoAD,testDataNLtoAD,predictType ="linear",type="LOGIT")
pm2tb <- plotModels.ROC(mptb$predictions,main="Bagging",cex=0.90)
mptb <- medianPredict(bgmod$formula,trainDataNLtoAD,testDataNLtoAD,predictType ="linear",type="SVM")
mptb$predictions$V2 <- mptb$predictions$V2 -0.5
pm2tb <- plotModels.ROC(mptb$predictions,main="SVN",cex=0.90)
mp2 <- medianPredict(as.character(baggedformula2),trainDataNLtoAD,testDataNLtoAD,predictType ="linear",type="LOGIT")
pm2 <- plotModels.ROC(mp2$predictions,main="Bagging",cex=0.90)
mp3 <- medianPredict(as.character(baggedformula),trainDataNLtoAD,testDataNLtoAD,predictType ="linear",type="LOGIT")
pm3 <- plotModels.ROC(mp3$predictions,main="Bagging",cex=0.90)
mps3 <- medianPredict(as.character(baggedformula),trainDataNLtoAD,testDataNLtoAD,predictType ="linear",type="SVM")
mps3$predictions[,2:26] <- mps3$predictions[,2:26] -0.5
pms3 <- plotModels.ROC(mp3$predictions,main="SVM",cex=0.90)
testLastTimePointWithNL$Event <- rep(0,nrow(testLastTimePointWithNL))
predictionNormalsToAD <- medianPredict(as.character(baggedformula2),trainDataNLtoAD,testLastTimePointWithNL,predictType ="prob",type="LOGIT")
write.csv(predictionNormalsToAD$medianPredict,file="ADFromNormalPrediction.csv")
```