-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathFinalScriptForTadpole.R
198 lines (137 loc) · 8.76 KB
/
FinalScriptForTadpole.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
##Load the Datasets
library(readxl)
TADPOLE_D1_D2_Dict <- read.csv("C:/Users/jtame/Dropbox (Personal)/Documents/FRESACAD/TADPOLE/TADPOLE/TADPOLE_D1_D2_Dict.csv", na.strings=c("NA",-4,"-4.0",""," "))
TADPOLE_D1_D2 <- read.csv("C:/Users/jtame/Dropbox (Personal)/Documents/FRESACAD/TADPOLE/TADPOLE/TADPOLE_D1_D2.csv", na.strings=c("NA",-4,"-4.0",""," "))
TADPOLE_D3 <- read.csv("C:/Users/jtame/Dropbox (Personal)/Documents/FRESACAD/TADPOLE/TADPOLE/TADPOLE_D3.csv", na.strings=c("NA",-4,"-4.0",""," ","NaN"))
TADPOLE_D4_corr <- read.csv("~/GitHub/R_Python_interoperability/data/TADPOLE_D4_corr.csv")
submissionTemplate <- as.data.frame(read_excel("TADPOLE_Simple_Submission_TeamName.xlsx"))
submissionTemplate$`Forecast Date` <- as.Date(paste(submissionTemplate$`Forecast Date`,"-01",sep=""))
submissionTemplate$`CN relative probability` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`MCI relative probability` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`AD relative probability` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$ADAS13 <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`ADAS13 50% CI lower` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`ADAS13 50% CI upper` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$Ventricles_ICV <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`Ventricles_ICV 50% CI lower` <- as.numeric(nrow(submissionTemplate))
submissionTemplate$`Ventricles_ICV 50% CI upper` <- as.numeric(nrow(submissionTemplate))
TADPOLE_D1_D2$EXAMDATE <- as.Date(TADPOLE_D1_D2$EXAMDATE)
TADPOLE_D3$EXAMDATE <- as.Date(TADPOLE_D3$EXAMDATE)
submissionTemplate <- submissionTemplate[order(submissionTemplate$`Forecast Month`),]
#DataSplit
TrainingSet <- subset(TADPOLE_D1_D2,D1==1)
D2TesingSet <- subset(TADPOLE_D1_D2,D2==1)
rownames(TrainingSet) <- paste(TrainingSet$RID,TrainingSet$VISCODE,sep="_")
rownames(D2TesingSet) <- paste(D2TesingSet$RID,D2TesingSet$VISCODE,sep="_")
rownames(TADPOLE_D3) <- paste(TADPOLE_D3$RID,TADPOLE_D3$VISCODE,sep="_")
#DataProcessing
source('~/GitHub/TADPOLE/dataPreprocessing.R')
source('~/GitHub/TADPOLE/TADPOLE_Train.R')
source('~/GitHub/TADPOLE/predictCognitiveStatus.R')
source('~/GitHub/TADPOLE/FiveYearForecast.R')
source('~/GitHub/TADPOLE/TADPOLE_Train_ADAS_ICV.R')
source('~/GitHub/TADPOLE/predictTADPOLERegresions.R')
### Data conditioning and preparation for D2
TrainingSet <- TrainingSet[order(TrainingSet$EXAMDATE),]
TrainingSet <- TrainingSet[order(as.numeric(TrainingSet$RID)),]
D2TesingSet <- D2TesingSet[order(D2TesingSet$EXAMDATE),]
D2TesingSet <- D2TesingSet[order(as.numeric(D2TesingSet$RID)),]
dataTadpole <- dataTADPOLEPreprocesing(TrainingSet,D2TesingSet,TADPOLE_D1_D2_Dict,MinVisit=36,colImputeThreshold=0.25,rowImputeThreshold=0.25)
tdf <- dataTadpole$Test_Imputed
ltptf <- dataTadpole$Test_Imputed
ltptf <- ltptf[order(ltptf$EXAMDATE),]
ltptf <- ltptf[order(as.numeric(ltptf$RID)),]
rids <- ltptf$RID
ltptf <- ltptf[c(rids[1:(length(rids)-1)] != rids[-1],TRUE),]
rownames(ltptf) <- ltptf$RID
plot(ltptf$Ventricles,ltptf$Ventricles_bl)
plot(ltptf$ADAS13,ltptf$ADAS13_bl)
ltptf <- dataTadpole$Train_Imputed
ltptf <- ltptf[order(ltptf$EXAMDATE),]
ltptf <- ltptf[order(as.numeric(ltptf$RID)),]
rids <- ltptf$RID
ltptf <- ltptf[c(rids[1:(length(rids)-1)] != rids[-1],TRUE),]
rownames(ltptf) <- ltptf$RID
plot(ltptf$Ventricles,ltptf$Ventricles_bl)
plot(ltptf$ADAS13,ltptf$ADAS13_bl)
save(dataTadpole,file="D2DataFrames.RDATA")
## Train 25 Models for the D2 subjects
CognitiveClassModels <- TrainTadpoleClassModels(dataTadpole$AdjustedTrainFrame,
predictors=c("AGE","PTGENDER",colnames(dataTadpole$AdjustedTrainFrame)[-c(1:22)]),
numberOfRandomSamples=25,
delta=TRUE,
MLMethod=BSWiMS.model,
NumberofRepeats = 1)
save(CognitiveClassModels,file="CognitiveClassModels_25.RDATA")
## Predict the models on D2 subjects
predictADNI <- forecastCognitiveStatus(CognitiveClassModels,dataTadpole$testingFrame)
### Training the ADAS13 and Ventricles
### Get the original Data D3 Train
dataTadpole$AdjustedTrainFrame$Ventricles <- TrainingSet[rownames(dataTadpole$AdjustedTrainFrame),"Ventricles"]/TrainingSet[rownames(dataTadpole$AdjustedTrainFrame),"ICV"]
dataTadpole$AdjustedTrainFrame$ADAS13 <- TrainingSet[rownames(dataTadpole$AdjustedTrainFrame),"ADAS13"]
## Train 50 models based on D1 data
CognitiveRegresModels <- TrainTadpoleRegresionModels(dataTadpole$AdjustedTrainFrame,
predictors=c("AGE","PTGENDER",colnames(dataTadpole$AdjustedTrainFrame)[-c(1:22)]),
numberOfRandomSamples=50,
MLMethod=BSWiMS.model,
NumberofRepeats = 1)
save(CognitiveRegresModels,file="CognitiveRegresModels_50_Nolog.RDATA")
### Ventricles and ADAS13 prediction preparation
## Transforming the test data set
dataTadpole$testingFrame$Ventricles <- dataTadpole$Test_Imputed[rownames(dataTadpole$testingFrame),"Ventricles"]/dataTadpole$Test_Imputed[rownames(dataTadpole$testingFrame),"ICV"]
dataTadpole$testingFrame$ADAS13 <- dataTadpole$Test_Imputed[rownames(dataTadpole$testingFrame),"ADAS13"]
## THe last time point required for forcasting ADAS13 and Ventricles
ltptf <- dataTadpole$testingFrame
ltptf <- ltptf[order(ltptf$EXAMDATE),]
ltptf <- ltptf[order(as.numeric(ltptf$RID)),]
rids <- ltptf$RID
ltptf <- ltptf[c(rids[1:(length(rids)-1)] != rids[-1],TRUE),]
rownames(ltptf) <- ltptf$RID
### Forecasting 5 years. The forcast transfomrs back to the actual space
forecast <- FiveYearForeCast(predictADNI,testDataset=ltptf,ADAS_Ventricle_Models=CognitiveRegresModels,Subject_datestoPredict=submissionTemplate)
write.csv(forecast,file="ForecastD2_BORREGOS_TEC.csv")
#D3 Cross sectional
## First Remove D2 subjects from Training Set
D3IDS <- TADPOLE_D3$RID
D3TrainingSet <- TrainingSet[!(TrainingSet$RID %in% D3IDS),]
## Conditioning the data sets
dataTadpoleD3 <- dataTADPOLEPreprocesing(D3TrainingSet,TADPOLE_D3,TADPOLE_D1_D2_Dict,MinVisit=18,colImputeThreshold=0.15,rowImputeThreshold=0.25,includeID=FALSE)
save(dataTadpoleD3,file="D3DataFrames.RDATA")
ltptf <- dataTadpoleD3$Test_Imputed
ltptf <- ltptf[order(ltptf$EXAMDATE),]
ltptf <- ltptf[order(as.numeric(ltptf$RID)),]
rids <- ltptf$RID
ltptf <- ltptf[c(rids[1:(length(rids)-1)] != rids[-1],TRUE),]
rownames(ltptf) <- ltptf$RID
plot(ltptf$Ventricles,ltptf$Ventricles_bl)
## Build the 35 predictive models of congnitive status
D3CognitiveClassModels <- TrainTadpoleClassModels(dataTadpoleD3$AdjustedTrainFrame,
predictors=c("AGE","PTGENDER",colnames(dataTadpoleD3$AdjustedTrainFrame)[-c(1:22)]),
numberOfRandomSamples=25,
MLMethod=BSWiMS.model,
NumberofRepeats = 1)
save(D3CognitiveClassModels,file="D3CognitiveClassModels_25.RDATA")
## Predict all D3 congnitive status
predictADNID3 <- forecastCognitiveStatus(D3CognitiveClassModels,dataTadpoleD3$testingFrame)
## Train D3 Correlations ADAS 13 and Ventricles
dataTadpoleD3$AdjustedTrainFrame$Ventricles <- D3TrainingSet[rownames(dataTadpoleD3$AdjustedTrainFrame),"Ventricles"]/D3TrainingSet[rownames(dataTadpoleD3$AdjustedTrainFrame),"ICV"]
dataTadpoleD3$AdjustedTrainFrame$ADAS13 <- D3TrainingSet[rownames(dataTadpoleD3$AdjustedTrainFrame),"ADAS13"]
D3RegresModels <- TrainTadpoleRegresionModels(dataTadpoleD3$AdjustedTrainFrame,
predictors=c("AGE","PTGENDER",colnames(dataTadpoleD3$AdjustedTrainFrame)[-c(1:22)]),
numberOfRandomSamples=50,
MLMethod=BSWiMS.model,
NumberofRepeats = 1)
save(D3RegresModels,file="D3RegresModelss_50_Nolog.RDATA")
## Predict the D3 ADAS13 and Ventricles
dataTadpoleD3$testingFrame$Ventricles <- dataTadpoleD3$Test_Imputed[rownames(dataTadpoleD3$testingFrame),"Ventricles"]/dataTadpoleD3$Test_Imputed[rownames(dataTadpoleD3$testingFrame),"ICV"]
dataTadpoleD3$testingFrame$ADAS13 <- dataTadpoleD3$Test_Imputed[rownames(dataTadpoleD3$testingFrame),"ADAS13"]
### The last time D3 point
ltptf <- dataTadpoleD3$testingFrame
ltptf <- ltptf[order(ltptf$EXAMDATE),]
ltptf <- ltptf[order(as.numeric(ltptf$RID)),]
rids <- ltptf$RID
ltptf <- ltptf[c(rids[1:(length(rids)-1)] != rids[-1],TRUE),]
rownames(ltptf) <- ltptf$RID
## Forecast the testing set
forecastD3 <- FiveYearForeCast(predictADNID3,testDataset=ltptf,ADAS_Ventricle_Models=D3RegresModels,Subject_datestoPredict=submissionTemplate)
write.csv(forecastD3,file="ForecastD3_BORREGOS_TEC.csv")