forked from thehyve/ShinyDeploy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.R
121 lines (105 loc) · 6.23 KB
/
utils.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
summaryPlpAnalyses <- function(analysesLocation){
# loads the analyses and validations to get summaries
#========================================
settings <- read.csv(file.path(analysesLocation,'settings.csv'))
settings <- settings[,!colnames(settings)%in%c('plpDataFolder','studyPopFile','plpResultFolder')]
settings$analysisId <- paste0('Analysis_', settings$analysisId)
analysisIds <- dir(file.path(analysesLocation), recursive = F, full.names = T)
analysisIds <- analysisIds[grep('Analysis_',analysisIds)]
if(is.null(settings$devDatabase)){
settings$devDatabase <- 'Missing'
}
settings$valDatabase <- settings$devDatabase
devPerformance <- do.call(rbind,lapply(file.path(analysisIds), getPerformance))
devPerformance <- merge(settings[,c('analysisId','modelSettingsId', 'cohortName', 'outcomeName',
'populationSettingId','modelSettingName','addExposureDaysToStart',
'riskWindowStart', 'addExposureDaysToEnd',
'riskWindowEnd','devDatabase','valDatabase')],
devPerformance, by='analysisId', all.x=T)
validationLocation <- file.path(analysesLocation,'Validation')
if(length(dir(validationLocation))>0){
valPerformances <- c()
valDatabases <- dir(validationLocation, recursive = F, full.names = T)
for( valDatabase in valDatabases){
valAnalyses <- dir(valDatabase, recursive = F, full.names = T)
valAnalyses <- valAnalyses[grep('Analysis_', valAnalyses)]
valPerformance <- do.call(rbind,lapply(file.path(valAnalyses), function(x) getValidationPerformance(x)))
valSettings <- settings[,c('analysisId','modelSettingsId', 'cohortName', 'outcomeName',
'populationSettingId','modelSettingName','addExposureDaysToStart',
'riskWindowStart', 'addExposureDaysToEnd',
'riskWindowEnd')]
valSettings$devDatabase <- settings$devDatabase[1]
valPerformance <- merge(valSettings,
valPerformance, by='analysisId')
valPerformance <- valPerformance[,colnames(devPerformance)] # make sure same order
valPerformances <- rbind(valPerformances, valPerformance)
}
if(ncol(valPerformances)==ncol(devPerformance)){
allPerformance <- rbind(devPerformance,valPerformances)
} else{
stop('Issue with dev and val performance data.frames')
}
} else {
allPerformance <- devPerformance
}
allPerformance$AUC <- as.double(allPerformance$AUC)
allPerformance$AUPRC <- as.double(allPerformance$AUPRC)
allPerformance$outcomeCount <- as.double(allPerformance$outcomeCount)
allPerformance$populationSize <- as.double(allPerformance$populationSize)
allPerformance$incidence <- as.double(allPerformance$incidence)
return(allPerformance)
}
getPerformance <- function(analysisLocation){
location <- file.path(analysisLocation, 'plpResult.rds')
if(!file.exists(location)){
analysisId <- strsplit(analysisLocation, '/')[[1]]
return(data.frame(analysisId=analysisId[length(analysisId)],
AUC=0.000, AUPRC=0, outcomeCount=0,
populationSize=0,incidence=0,plpResultLocation=location,
plpResultLoad='loadPlpResult'))
}
# read rds here
res <- readRDS(file.path(analysisLocation,'plpResult.rds'))
res <- as.data.frame(res$performanceEvaluation$evaluationStatistics)
#if empty do edit?
res <- tryCatch(reshape2::dcast(res[res$Eval=='test',], analysisId ~ Metric, value.var='Value'),
error = function(cont) return(NULL))
if(is.null(res)){
return(NULL) }
res <- res[,!colnames(res)%in%c("BrierScore","BrierScaled")]
res$incidence <- as.double(res$outcomeCount)/as.double(res$populationSize)*100
res[, !colnames(res)%in%c('analysisId','outcomeCount','populationSize')] <-
format(as.double(res[, !colnames(res)%in%c('analysisId','outcomeCount','populationSize')]), digits = 2, scientific = F)
if(sum(colnames(res)=='AUC.auc_ub95ci')>0){
res$AUC <- res$AUC.auc
#res$AUC <- paste0(res$AUC.auc, ' (', res$AUC.auc_lb95ci,'-', res$AUC.auc_ub95ci,')')
}
res$plpResultLocation <- location
res$plpResultLoad <- 'readRDS'#'loadPlpResult'
return(res[,c('analysisId', 'AUC', 'AUPRC', 'outcomeCount','populationSize','incidence','plpResultLocation', 'plpResultLoad')])
}
getValidationPerformance <- function(validationLocation){
val <- readRDS(file.path(validationLocation,'validationResult.rds'))
if("performanceEvaluation"%in%names(val)){
valPerformance <- reshape2::dcast(as.data.frame(val$performanceEvaluation$evaluationStatistics),
analysisId ~ Metric, value.var='Value')
} else {
valPerformance <- reshape2::dcast(as.data.frame(val[[1]]$performanceEvaluation$evaluationStatistics),
analysisId ~ Metric, value.var='Value')
}
valPerformance$incidence <- as.double(valPerformance$outcomeCount)/as.double(valPerformance$populationSize)*100
valPerformance[, !colnames(valPerformance)%in%c('analysisId','outcomeCount','populationSize')] <-
format(as.double(valPerformance[, !colnames(valPerformance)%in%c('analysisId','outcomeCount','populationSize')]), digits = 2, scientific = F)
if(sum(colnames(valPerformance)=='AUC.auc_ub95ci')>0){
valPerformance$AUC <- valPerformance$AUC.auc
#valPerformance$AUC <- paste0(valPerformance$AUC.auc, ' (', valPerformance$AUC.auc_lb95ci,'-', valPerformance$AUC.auc_ub95ci,')')
}
valPerformance$analysisId <- strsplit(validationLocation, '/')[[1]][[length(strsplit(validationLocation, '/')[[1]])]]
valPerformance$valDatabase <- strsplit(validationLocation, '/')[[1]][[length(strsplit(validationLocation, '/')[[1]])-1]]
valPerformance <- valPerformance[,c('analysisId','valDatabase', 'AUC', 'AUPRC', 'outcomeCount','populationSize','incidence')]
valPerformance$plpResultLocation <- file.path(validationLocation,'validationResult.rds')
valPerformance$plpResultLoad <- 'readRDS'
#valPerformance$rocplot <- file.path(validationLocation,'plots','sparseROC.pdf')
#valPerformance$calplot <- file.path(validationLocation,'plots','sparseCalibrationConventional.pdf')
return(valPerformance)
}