diff --git a/incidenceMapR/R/latentFieldModel.R b/incidenceMapR/R/latentFieldModel.R index af347cc..be277e6 100644 --- a/incidenceMapR/R/latentFieldModel.R +++ b/incidenceMapR/R/latentFieldModel.R @@ -38,9 +38,13 @@ latentFieldModel <- function(db , shp, family = NULL, neighborGraph = NULL){ } # construct priors - hyper=list() - hyper$global <- list(prec = list( prior = "pc.prec", param = 1e0, alpha = 0.01)) - hyper$local <- list(prec = list( prior = "pc.prec", param = 1e1, alpha = 0.01)) + hyper <- list() + + hyper.ar1 = + hyper$global <- list(prec = list( prior = "pc.prec", param = c(1e0, 0.01))) + hyper$local <- list(prec = list( prior = "pc.prec", param = c(1e-1, 0.01)), + rho = list(prior="pc.cor0", param=c(0.3,0.01))) + hyper$age <- list(prec = list( prior = "pc.prec", param = 1e-1, alpha = 0.01)) hyper$time <- list(prec = list( prior = "pc.prec", param = 1e-1, alpha = 0.01)) hyper$site_iid <- list(prec = list( prior = "pc.prec", param = 1e0, alpha = 0.01)) @@ -77,9 +81,9 @@ latentFieldModel <- function(db , shp, family = NULL, neighborGraph = NULL){ # initialize formula for each level if(numLevels>1){ outcomeStr <- paste('cbind(',paste(paste('outcome',1:numLevels,sep='.'),sep='',collapse=', '),')',sep='',collapse = '') - formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + offset(catchment)',sep=' ')) + formula <- as.formula(paste(outcomeStr,'~','pathogen - 1 + catchment',sep=' ')) } else { # why does R do inconsistent stuff with column names!?!! - formula <- as.formula('outcome ~ 1 + offset(catchment) ') + formula <- as.formula('outcome ~ 1 + catchment ') } # factors as fixed effects @@ -200,12 +204,19 @@ latentFieldModel <- function(db , shp, family = NULL, neighborGraph = NULL){ inputData$residence_regional_nameRow <- match(inputData$residence_regional_name,unique(inputData$residence_regional_name)) + # inputData$time_row_residence_regional_name <- match(inputData$residence_regional_name,unique(inputData$residence_regional_name)) + if('time_row' %in% names(inputData)){ inputData$time_row_residence_regional_name <- inputData$time_row formula <- update(formula, ~ . + f(residence_regional_nameRow, model='iid', diagonal=1e-3, hyper=modelDefinition$local, constr = TRUE, replicate=replicateIdx, - group = time_row_residence_regional_name, control.group=list(model="ar1"))) + group = time_row_residence_regional_name, control.group=list(model="ar1",hyper=modelDefinition$local))) + + # inputData$residence_regional_name_timeRow <- inputData$time_row + + # formula <- update(formula, ~ . + f(residence_regional_name_timeRow, model='ar1', diagonal=1e-3, hyper=modelDefinition$local, constr = TRUE, replicate=replicateIdx, + # group = time_row_residence_regional_name, control.group=list(model="iid"))) validLatentFieldColumns <- c(validLatentFieldColumns,'residence_regional_nameRow','time_row_residence_regional_name') } else { diff --git a/model_build_scripts/buildLatentFieldModelsForDeployment.R b/model_build_scripts/buildLatentFieldModelsForDeployment.R index 981c1e0..7003e07 100644 --- a/model_build_scripts/buildLatentFieldModelsForDeployment.R +++ b/model_build_scripts/buildLatentFieldModelsForDeployment.R @@ -32,9 +32,9 @@ names(tmp2)<-tmp # tmp2) pathogenKeys <- list( - flu=fluPathogens, Flu_A_H1 = 'Flu_A_H1', Flu_A_H3 = 'Flu_A_H3', Flu_B_pan = 'Flu_B_pan', Flu_C_pan = 'Flu_C_pan'#, - # all='all', other_non_flu = setdiff(pathogens$pathogen,c(fluPathogens,'not_yet_tested','measles','Measles'))#, - # rsv = c('RSVA','RSVB'), RSVA='RSVA', RSVB='RSVB', + flu=fluPathogens, Flu_A_H1 = 'Flu_A_H1', Flu_A_H3 = 'Flu_A_H3', Flu_B_pan = 'Flu_B_pan', Flu_C_pan = 'Flu_C_pan', + all='all', #other_non_flu = setdiff(pathogens$pathogen,c(fluPathogens,'not_yet_tested','measles','Measles'))#, + rsv = c('RSVA','RSVB'), RSVA='RSVA', RSVB='RSVB'#, # AdV='AdV',CoV='CoV',RV='RV' ) @@ -69,7 +69,7 @@ nowcastWeek <- paste(nowcastYear ,'-W',sprintf("%02d",nowcastWeek),sep='') # number of subjects with pathogen and factor at residence location for (SOURCE in names(geoLevels)){ for (GEO in geoLevels[[SOURCE]]){ - + # SOURCE='sfs_domain_geojson' # GEO='residence_regional_name' # PATHOGEN='flu' @@ -83,6 +83,8 @@ for (SOURCE in names(geoLevels)){ # PATHOGEN='all' # PATHOGEN='rsv' # PATHOGEN='other_non_flu' + # PATHOGEN='RSVA' + shp <- masterSpatialDB(shape_level = gsub('residence_','',GEO), source = SOURCE) @@ -103,7 +105,7 @@ for (SOURCE in names(geoLevels)){ # forecast past incomplete data # db$observedData$positive[db$observedData$encountered_week >= paste('2019-W',isoweek(mostRecentSample), sep='')] <- NA - + # db$observedData <- db$observedData %>% filter(encountered_week<='2019-W24') library(incidenceMapR) library(modelServR) @@ -111,7 +113,7 @@ for (SOURCE in names(geoLevels)){ # hack in all flu lab timeseries - db2 <- read.csv('all_flu_by_time_query_result_2020-01-06T21_49_44.110Z.csv') + db2 <- read.csv('all_flu_by_time_query_result_2020-01-12T17_54_25.518Z.csv') lineages <- unique(db2$lineage) levels(db2$lineage)<-fluPathogens[c(1,2,4,5)] names(db2)[1]<-'pathogen' @@ -127,8 +129,8 @@ for (SOURCE in names(geoLevels)){ ## make sure always extrapolates to nowcastWeek - # mostRecentWeek <- max(countData$observedData$encountered_week) - mostRecentWeek <- '2019-W52' + mostRecentWeek <- max(countData$observedData$encountered_week) + # mostRecentWeek <- '2019-W52' minYear <- as.numeric(gsub('-W[0-9]{2}','',mostRecentWeek)) maxYear <- as.numeric(gsub('-W[0-9]{2}','',nowcastWeek)) @@ -155,10 +157,11 @@ for (SOURCE in names(geoLevels)){ if(!is.null(forecast_weeks)){ countData$observedData <- countData$observedData %>% tibble::add_row(encountered_week = forecast_weeks, n=0) %>% distinct(encountered_week,.keep_all = TRUE) } - + # filter out most recent week, which is likely very data incomplete countData$observedData$positive[countData$observedData$encountered_week >= as.character(mostRecentWeek)] <- NA - + tail(countData$observedData) + countData$observedData$time_row<-as.numeric(countData$observedData$encountered_week) @@ -223,15 +226,15 @@ for (SOURCE in names(geoLevels)){ dev.off() - ggplot(model$modeledData %>% filter(age_range_coarse_upper==65)) + + ggplot(model$modeledData %>% group_by_('encountered_week', GEO) %>% summarize(modeled_count_mode=sum(modeled_count_mode, na.rm=TRUE))) + geom_line(aes_string(x='encountered_week',y="modeled_count_mode", color=GEO,group =GEO)) + - facet_wrap('site_type',scales = 'free_y') + + # facet_wrap('site_type',scales = 'free_y') + guides(color=FALSE, fill=FALSE) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) - ggplot(model$modeledData %>% filter(age_range_coarse_upper==65)) + + ggplot(model$modeledData %>% group_by_('encountered_week', GEO) %>% summarize(positive=sum(positive, na.rm=TRUE))) + geom_line(aes_string(x='encountered_week',y="positive", color=GEO,group =GEO)) + - facet_wrap('site_type',scales = 'free_y') + + # facet_wrap('site_type',scales = 'free_y') + guides(color=FALSE, fill=FALSE) + theme(axis.text.x = element_text(angle = 90, hjust = 1))