From 19dbc3172535fe06db41f9903efe6eb1d0abe834 Mon Sep 17 00:00:00 2001 From: Insang Song <insang.song@nih.gov> Date: Wed, 1 May 2024 11:19:10 -0400 Subject: [PATCH 1/2] styling --- R/autoKrige.r | 157 +++++++++-------- R/autoKrigeST.R | 342 ++++++++++++++++++------------------ R/autoKrigeST.cv.R | 326 +++++++++++++++++----------------- R/autofitVariogram.r | 355 +++++++++++++++++++------------------- R/autofitVariogramST.r | 263 +++++++++++++++------------- R/create_new_data.r | 319 +++++++++++++++++----------------- R/marginal.variogramST.r | 15 +- R/plot.autofitVariogram.R | 35 ++-- R/setSTI.R | 72 ++++---- R/zzz.r | 56 +++--- 10 files changed, 1001 insertions(+), 939 deletions(-) diff --git a/R/autoKrige.r b/R/autoKrige.r index 3466b51..a71dd2e 100644 --- a/R/autoKrige.r +++ b/R/autoKrige.r @@ -1,88 +1,95 @@ -autoKrige = function(formula, input_data, new_data, data_variogram = input_data, block = 0, - model = c("Sph", "Exp", "Gau", "Ste"), kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), - fix.values = c(NA,NA,NA), remove_duplicates = TRUE, verbose = FALSE, GLS.model = NA, - start_vals = c(NA,NA,NA), miscFitOptions = list(), ...) +autoKrige <- function(formula, input_data, new_data, data_variogram = input_data, block = 0, + model = c("Sph", "Exp", "Gau", "Ste"), kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), + fix.values = c(NA, NA, NA), remove_duplicates = TRUE, verbose = FALSE, GLS.model = NA, + start_vals = c(NA, NA, NA), miscFitOptions = list(), ...) # This function performs an automatic Kriging on the data in input_data { - if(inherits(formula, "SpatialPointsDataFrame")) - # Is someone just passes a spatialpointsdataframe, assume he/she wants to interpolate the first column with Ordinary Kriging - { - input_data = formula - formula = as.formula(paste(names(input_data)[1], "~ 1")) - } - - # Check if inpu_data and data_variogram are SpatialPointsDataFrame - if(!inherits(input_data,"SpatialPointsDataFrame") | !inherits(data_variogram,"SpatialPointsDataFrame")) + if (inherits(formula, "SpatialPointsDataFrame")) + # Is someone just passes a spatialpointsdataframe, assume he/she wants to interpolate the first column with Ordinary Kriging { - stop(paste("\nInvalid input objects: input_data or data_variogram not of class 'SpatialPointsDataFrame'.\n\tClass input_data: '", - class(input_data),"'", - "\n\tClass data_variogram: '", - class(data_variogram),"'",sep='')) + input_data <- formula + formula <- as.formula(paste(names(input_data)[1], "~ 1")) } - - if(as.character(formula)[3] != 1 & missing(new_data)) stop("If you want to use Universal Kriging, new_data needs to be specified \n because the predictors are also required on the prediction locations.") - if("newdata" %in% names(list(...))) stop("The argument name for the prediction object is not 'newdata', but 'new_data'.") - - # Check if there are points or gridcells on the exact same coordinate and provide a more informative error message. - # Points on the same spot causes the interpolation to crash. - if(remove_duplicates) - { - zd = zerodist(input_data) - if(length(zd) != 0) - { - warning("Removed ", length(zd) / 2, " duplicate observation(s) in input_data:", immediate. = TRUE) - print(input_data[c(zd), ]) - input_data = input_data[-zd[, 2], ] - - } + + # Check if inpu_data and data_variogram are SpatialPointsDataFrame + if (!inherits(input_data, "SpatialPointsDataFrame") | !inherits(data_variogram, "SpatialPointsDataFrame")) { + stop(paste("\nInvalid input objects: input_data or data_variogram not of class 'SpatialPointsDataFrame'.\n\tClass input_data: '", + class(input_data), "'", + "\n\tClass data_variogram: '", + class(data_variogram), "'", + sep = "" + )) + } + + if (as.character(formula)[3] != 1 & missing(new_data)) stop("If you want to use Universal Kriging, new_data needs to be specified \n because the predictors are also required on the prediction locations.") + if ("newdata" %in% names(list(...))) stop("The argument name for the prediction object is not 'newdata', but 'new_data'.") + + # Check if there are points or gridcells on the exact same coordinate and provide a more informative error message. + # Points on the same spot causes the interpolation to crash. + if (remove_duplicates) { + zd <- zerodist(input_data) + if (length(zd) != 0) { + warning("Removed ", length(zd) / 2, " duplicate observation(s) in input_data:", immediate. = TRUE) + print(input_data[c(zd), ]) + input_data <- input_data[-zd[, 2], ] } + } + + # If all the values return an informative error + col_name <- as.character(formula)[2] + if (length(unique(input_data[[col_name]])) == 1) stop(sprintf("All data in attribute \'%s\' is identical and equal to %s\n Can not interpolate this data", col_name, unique(input_data[[col_name]])[1])) - # If all the values return an informative error - col_name = as.character(formula)[2] - if(length(unique(input_data[[col_name]])) == 1) stop(sprintf("All data in attribute \'%s\' is identical and equal to %s\n Can not interpolate this data", col_name, unique(input_data[[col_name]])[1])) - - if(missing(new_data)) new_data = create_new_data(input_data) - - ## Perform some checks on the projection systems of input_data and new_data - p4s_obj1 = proj4string(input_data) - p4s_obj2 = proj4string(new_data) - if(!all(is.na(c(p4s_obj1, p4s_obj2)))) { - if(is.na(p4s_obj1) & !is.na(p4s_obj2)) proj4string(input_data) = proj4string(new_data) - if(!is.na(p4s_obj1) & is.na(p4s_obj2)) proj4string(new_data) = proj4string(input_data) - if(any(!c(is.projected(input_data), is.projected(new_data)))) stop(paste("Either input_data or new_data is in LongLat, please reproject.\n", - " input_data: ", p4s_obj1, "\n", - " new_data: ", p4s_obj2, "\n")) - if(proj4string(input_data) != proj4string(new_data)) stop(paste("Projections of input_data and new_data do not match:\n", - " input_data: ", p4s_obj1, "\n", - " new_data: ", p4s_obj2, "\n")) - } + if (missing(new_data)) new_data <- create_new_data(input_data) - # Fit the variogram model, first check which model is used - variogram_object = autofitVariogram(formula, - data_variogram, - #autoselect.model = autoselect.model, - model = model, - kappa = kappa, - fix.values = fix.values, - verbose = verbose, - GLS.model = GLS.model, - start_vals = start_vals, - miscFitOptions = miscFitOptions) + ## Perform some checks on the projection systems of input_data and new_data + p4s_obj1 <- proj4string(input_data) + p4s_obj2 <- proj4string(new_data) + if (!all(is.na(c(p4s_obj1, p4s_obj2)))) { + if (is.na(p4s_obj1) & !is.na(p4s_obj2)) proj4string(input_data) <- proj4string(new_data) + if (!is.na(p4s_obj1) & is.na(p4s_obj2)) proj4string(new_data) <- proj4string(input_data) + if (any(!c(is.projected(input_data), is.projected(new_data)))) { + stop(paste( + "Either input_data or new_data is in LongLat, please reproject.\n", + " input_data: ", p4s_obj1, "\n", + " new_data: ", p4s_obj2, "\n" + )) + } + if (proj4string(input_data) != proj4string(new_data)) { + stop(paste( + "Projections of input_data and new_data do not match:\n", + " input_data: ", p4s_obj1, "\n", + " new_data: ", p4s_obj2, "\n" + )) + } + } - ## Perform the interpolation - krige_result = krige(formula, - input_data, - new_data, - variogram_object$var_model, - block = block, - ...) + # Fit the variogram model, first check which model is used + variogram_object <- autofitVariogram(formula, + data_variogram, + # autoselect.model = autoselect.model, + model = model, + kappa = kappa, + fix.values = fix.values, + verbose = verbose, + GLS.model = GLS.model, + start_vals = start_vals, + miscFitOptions = miscFitOptions + ) - krige_result$var1.stdev = sqrt(krige_result$var1.var) + ## Perform the interpolation + krige_result <- krige(formula, + input_data, + new_data, + variogram_object$var_model, + block = block, + ... + ) - # Aggregate the results into an autoKrige object - result = list(krige_output = krige_result,exp_var = variogram_object$exp_var, var_model = variogram_object$var_model, sserr = variogram_object$sserr) - class(result) = c("autoKrige","list") + krige_result$var1.stdev <- sqrt(krige_result$var1.var) - return(result) + # Aggregate the results into an autoKrige object + result <- list(krige_output = krige_result, exp_var = variogram_object$exp_var, var_model = variogram_object$var_model, sserr = variogram_object$sserr) + class(result) <- c("autoKrige", "list") + return(result) } diff --git a/R/autoKrigeST.R b/R/autoKrigeST.R index 9807e64..f03aee9 100644 --- a/R/autoKrigeST.R +++ b/R/autoKrigeST.R @@ -1,181 +1,189 @@ # last revision: 08/12/2021 #' Cross-validation of spatiotemporal Kriging #' @description Performs an automatic Kriging on the data with ST*DF or sftime object -#' @param data a `sftime`-class or `ST*DF`-class object. `ST*DF`-class object will be converted to sftime automatically. +#' @param data a `sftime`-class or `ST*DF`-class object. `ST*DF`-class object will be converted to sftime automatically. #' @param fold_dim character. the dimension at which you want to cross-validate (spatial, temporal, and random) #' @param nfold integer. the number of folds. 10 as the default. #' @return The cross-validated spatiotemporal Kriging results. #' @examples #' data(air) -#' deair = STFDF(stations, dates, data.frame(PM10 = as.vector(air))) -#' deair_sf = st_as_stars(deair) %>% -#' st_transform('+proj=longlat +ellps=sphere') -#' deair_sf = st_transform(deair_sf, 3857) -#' deair_r = as(deair_sf, 'STFDF') -#' deair_rs = deair_r[,3751:3800] +#' deair <- STFDF(stations, dates, data.frame(PM10 = as.vector(air))) +#' deair_sf <- st_as_stars(deair) %>% +#' st_transform("+proj=longlat +ellps=sphere") +#' deair_sf <- st_transform(deair_sf, 3857) +#' deair_r <- as(deair_sf, "STFDF") +#' deair_rs <- deair_r[, 3751:3800] #' ## autoKrigeST.cv test -#' akst = autoKrigeST(formula = PM10~1, data = deair_rs, +#' akst = autoKrigeST(formula = PM10~1, data = deair_rs, #' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8) -autoKrigeST = function(formula, - input_data, new_data, - type_stv = 'sumMetric', - #data_variogram = input_data, - block = 0, - model = c("Sph", "Exp", "Gau", "Ste"), - kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), - fix.values = c(NA,NA,NA), - #remove_duplicates = TRUE, - newdata_mode = 'rect', - newdata_npoints = 3e3, - GLS.model = NA, - tlags = 0:6, - cutoff = 2e4, - width = 5e2, - forward = 6, - predict_chunk = NULL, - nmax = Inf, - aniso_method = 'vgm', - type_joint = 'Exp', - prodsum_k = 0.25, - surface = FALSE, - start_vals = c(NA,NA,NA), - miscFitOptions = list(), - measurement_error = c(0,0,0), - cores = 1, - verbose = TRUE) { - - if(inherits(input_data, "STIDF") | inherits(input_data, "STFDF") | inherits(input_data, "STSDF")) { - input_data = sftime::st_as_sftime(input_data) - # input_data = formula - # formula = as.formula(paste(names(input_data)[1], "~ 1")) - } - - # if((!inherits(input_data,"STFDF") & !inherits(data_variogram,"STFDF") & !inherits(input_data,"STIDF") & !inherits(data_variogram,"STIDF") & !inherits(input_data,"STSDF") & !inherits(data_variogram,"STSDF"))) - # { - # stop(paste("\nInvalid input objects: input_data or data_variogram not of class 'ST*DF'.\n\tClass input_data: '", - # class(input_data),"'", - # "\n\tClass data_variogram: '", - # class(data_variogram),"'",sep='')) - # } - - if(as.character(formula)[3] != 1 & missing(new_data)) stop("If you want to use Universal Kriging, new_data needs to be specified \n because the predictors are also required on the prediction of spatio-temporal data elements.") - # TODO: remove duplicate objects in ST*DF (spatially) - # Check if there are points or gridcells on the exact same coordinate and provide a more informative error message. - # Points on the same spot causes the interpolation to crash. - #if(remove_duplicates) - #{ - # zd = zerodist(input_data) - # if(length(zd) != 0) - # { - # warning("Removed ", length(zd) / 2, " duplicate observation(s) in input_data:", immediate. = TRUE) - # print(input_data[c(zd), ]) - # input_data = input_data[-zd[, 2], ] - - # } - #} - - # If all the values return an informative error - col_name = as.character(formula)[2] - if (length(unique(input_data[[col_name]])) == 1) { - stop(sprintf("All data in attribute \'%s\' is identical and equal to %s\n Can not interpolate this data", col_name, unique(input_data[[col_name]])[1])) +autoKrigeST <- function(formula, + input_data, new_data, + type_stv = "sumMetric", + # data_variogram = input_data, + block = 0, + model = c("Sph", "Exp", "Gau", "Ste"), + kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), + fix.values = c(NA, NA, NA), + # remove_duplicates = TRUE, + newdata_mode = "rect", + newdata_npoints = 3e3, + GLS.model = NA, + tlags = 0:6, + cutoff = 2e4, + width = 5e2, + forward = 6, + predict_chunk = NULL, + nmax = Inf, + aniso_method = "vgm", + type_joint = "Exp", + prodsum_k = 0.25, + surface = FALSE, + start_vals = c(NA, NA, NA), + miscFitOptions = list(), + measurement_error = c(0, 0, 0), + cores = 1, + verbose = TRUE) { + if (inherits(input_data, "STIDF") | inherits(input_data, "STFDF") | inherits(input_data, "STSDF")) { + input_data <- sftime::st_as_sftime(input_data) + # input_data = formula + # formula = as.formula(paste(names(input_data)[1], "~ 1")) + } + + # if((!inherits(input_data,"STFDF") & !inherits(data_variogram,"STFDF") & !inherits(input_data,"STIDF") & !inherits(data_variogram,"STIDF") & !inherits(input_data,"STSDF") & !inherits(data_variogram,"STSDF"))) + # { + # stop(paste("\nInvalid input objects: input_data or data_variogram not of class 'ST*DF'.\n\tClass input_data: '", + # class(input_data),"'", + # "\n\tClass data_variogram: '", + # class(data_variogram),"'",sep='')) + # } + + if (as.character(formula)[3] != 1 & missing(new_data)) stop("If you want to use Universal Kriging, new_data needs to be specified \n because the predictors are also required on the prediction of spatio-temporal data elements.") + # TODO: remove duplicate objects in ST*DF (spatially) + # Check if there are points or gridcells on the exact same coordinate and provide a more informative error message. + # Points on the same spot causes the interpolation to crash. + # if(remove_duplicates) + # { + # zd = zerodist(input_data) + # if(length(zd) != 0) + # { + # warning("Removed ", length(zd) / 2, " duplicate observation(s) in input_data:", immediate. = TRUE) + # print(input_data[c(zd), ]) + # input_data = input_data[-zd[, 2], ] + + # } + # } + + # If all the values return an informative error + col_name <- as.character(formula)[2] + if (length(unique(input_data[[col_name]])) == 1) { + stop(sprintf("All data in attribute \'%s\' is identical and equal to %s\n Can not interpolate this data", col_name, unique(input_data[[col_name]])[1])) + } + + if (missing(new_data)) { + new_data <- create_new_data.ST(input_data, + form = formula, + gen_mode = newdata_mode, + npoints = newdata_npoints, + forward = forward + ) + } + + ## Perform some checks on the projection systems of input_data and new_data + crs_in <- sf::st_crs(input_data) + crs_new <- sf::st_crs(new_data) + if (!all(is.na(c(crs_in, crs_new)))) { + if (is.na(crs_in) & !is.na(crs_new)) { + sf::st_crs(input_data) <- sf::st_crs(new_data) + } else if (!is.na(crs_in) & is.na(crs_new)) { + sf::st_crs(new_data) <- sf::st_crs(input_data) + } else if (crs_in[["wkt"]] != crs_new[["wkt"]]) { + stop(paste( + "Projections of input_data and new_data do not match:\n", + " input_data: ", crs_in, "\n", + " new_data: ", crs_new, "\n" + )) + } else if (any(!grepl("PROJ", crs_in[["wkt"]]), !grepl("PROJ", crs_new[["wkt"]]))) { + stop(paste( + "One of input_data and new_data is in LongLat, please reproject.\n", + " input_data: ", crs_in, "\n", + " new_data: ", crs_new, "\n" + )) } - - if(missing(new_data)) { - new_data = create_new_data.ST(input_data, - form = formula, - gen_mode = newdata_mode, - npoints = newdata_npoints, - forward = forward) - } - - ## Perform some checks on the projection systems of input_data and new_data - crs_in = sf::st_crs(input_data) - crs_new = sf::st_crs(new_data) - if (!all(is.na(c(crs_in, crs_new)))) { - if (is.na(crs_in) & !is.na(crs_new)) { - sf::st_crs(input_data) = sf::st_crs(new_data) - } - else if (!is.na(crs_in) & is.na(crs_new)) { - sf::st_crs(new_data) = sf::st_crs(input_data) - } - else if (crs_in[["wkt"]] != crs_new[["wkt"]]) { - stop(paste("Projections of input_data and new_data do not match:\n", - " input_data: ", crs_in, "\n", - " new_data: ", crs_new, "\n")) - } - else if (any(!grepl("PROJ", crs_in[["wkt"]]), !grepl("PROJ", crs_new[["wkt"]]))) { stop(paste("One of input_data and new_data is in LongLat, please reproject.\n", - " input_data: ", crs_in, "\n", - " new_data: ", crs_new, "\n")) - } - } else { - stop("One of input_data and new_data has no coordinate systems.\n") - } - - # Fit the variogram model, first check which model is used - # will be deprecated after v.2.0 - input_data = as(as(input_data, "STIDF"), "STSDF") - new_data = as(as(new_data, "STIDF"), "STSDF") - data_variogram = input_data - - variogram_object = autofitVariogramST(formula = formula, - stf = data_variogram, - typestv = type_stv, - candidate_model = model, - tlags = tlags, - cutoff = cutoff, - width = width, - aniso_method = aniso_method, - type_joint = type_joint, - prodsum_k = prodsum_k, - surface = surface, - measurement_error = measurement_error, - cores = cores, - verbose = verbose) - - if (!is.null(predict_chunk)) { + } else { + stop("One of input_data and new_data has no coordinate systems.\n") + } + + # Fit the variogram model, first check which model is used + # will be deprecated after v.2.0 + input_data <- as(as(input_data, "STIDF"), "STSDF") + new_data <- as(as(new_data, "STIDF"), "STSDF") + data_variogram <- input_data + + variogram_object <- autofitVariogramST( + formula = formula, + stf = data_variogram, + typestv = type_stv, + candidate_model = model, + tlags = tlags, + cutoff = cutoff, + width = width, + aniso_method = aniso_method, + type_joint = type_joint, + prodsum_k = prodsum_k, + surface = surface, + measurement_error = measurement_error, + cores = cores, + verbose = verbose + ) + + if (!is.null(predict_chunk)) { ## Perform the interpolation by chunk - #new_data_sti = as(new_data, 'STIDF') - len_new_data = dim(new_data)[1] - len_chunks = ceiling(len_new_data / predict_chunk) - len_indices_start = (predict_chunk * seq(0, len_chunks - 1, 1)) + 1 - len_indices_end = len_indices_start + predict_chunk - 1 - len_indices_end[length(len_indices_end)] = len_new_data - - krige_results_l = vector('list', length = len_chunks) - pb = txtProgressBar(style = 3, max = len_chunks) - - for (i in 1:len_chunks) { - krige_results_l[[i]] = krigeST(formula = formula, - data = input_data, - newdata = new_data[len_indices_start[i]:len_indices_end[i],], - nmax = nmax, - computeVar = TRUE, - bufferNmax = 2, - modelList = variogram_object$jointSTV) - setTxtProgressBar(pb, i) - - } - close(pb) - - krige_result = do.call('rbind', krige_results_l) - } else { - krige_result = krigeST(formula = formula, - data = input_data, - newdata = new_data, - nmax = nmax, - computeVar = TRUE, - bufferNmax = 2, - modelList = variogram_object$jointSTV) + # new_data_sti = as(new_data, 'STIDF') + len_new_data <- dim(new_data)[1] + len_chunks <- ceiling(len_new_data / predict_chunk) + len_indices_start <- (predict_chunk * seq(0, len_chunks - 1, 1)) + 1 + len_indices_end <- len_indices_start + predict_chunk - 1 + len_indices_end[length(len_indices_end)] <- len_new_data + + krige_results_l <- vector("list", length = len_chunks) + pb <- txtProgressBar(style = 3, max = len_chunks) + + for (i in 1:len_chunks) { + krige_results_l[[i]] <- krigeST( + formula = formula, + data = input_data, + newdata = new_data[len_indices_start[i]:len_indices_end[i], ], + nmax = nmax, + computeVar = TRUE, + bufferNmax = 2, + modelList = variogram_object$jointSTV + ) + setTxtProgressBar(pb, i) } - - krige_result = as(krige_result, 'STFDF') - - # Aggregate the results into an autoKrige object - result = list(krige_output = krige_result, - var_model = variogram_object$jointSTV) - class(result) = c("autoKrigeST","list") - - return(result) - -} \ No newline at end of file + close(pb) + + krige_result <- do.call("rbind", krige_results_l) + } else { + krige_result <- krigeST( + formula = formula, + data = input_data, + newdata = new_data, + nmax = nmax, + computeVar = TRUE, + bufferNmax = 2, + modelList = variogram_object$jointSTV + ) + } + + krige_result <- as(krige_result, "STFDF") + + # Aggregate the results into an autoKrige object + result <- list( + krige_output = krige_result, + var_model = variogram_object$jointSTV + ) + class(result) <- c("autoKrigeST", "list") + + return(result) +} diff --git a/R/autoKrigeST.cv.R b/R/autoKrigeST.cv.R index 802c5bb..849d80a 100644 --- a/R/autoKrigeST.cv.R +++ b/R/autoKrigeST.cv.R @@ -2,7 +2,7 @@ #' Cross-validation of spatiotemporal Kriging #' -#' @param data a `STFDF`-class object +#' @param data a `STFDF`-class object #' @param fold_dim character. the dimension at which you want to cross-validate (spatial, temporal, and random) #' @param nfold integer. the number of folds. 10 as the default #' @param formula formula. e.g., y~1 @@ -20,7 +20,7 @@ #' @param predict_chunk integer. The number of data points per chunk in the new data for the large data. It should be meticulously chosen according to the user's machine specification. #' @param nmax integer or positive infinite. The maximum number of spatiotemporal neighbors to conduct the local spatiotemporal Kriging. #' @param aniso_method character. One of 'vgm', 'linear', 'range', and 'metric'. Please refer to ?gstat::estiStAni. -#' @param type_joint character. The model form of joint spatiotemporal variogram. +#' @param type_joint character. The model form of joint spatiotemporal variogram. #' @param prodsum_k numeric. The parameter for the case when 'productSum' is chosen for type_stv. #' @param start_vals numeric vector (3). The initial values to optimize the spatiotemporal variogram model. #' @param miscFitOptions list. See ?automap::autofitVariogram. @@ -33,172 +33,172 @@ #' library(stars) #' library(dplyr) #' data(air) -#' deair = STFDF(stations, dates, data.frame(PM10 = as.vector(air))) -#' deair_sf = st_as_stars(deair, crs = '+proj=longlat +ellps=sphere') -#' deair_sf = st_transform(deair_sf, 3857) -#' deair_r = as(deair_sf, 'STFDF') -#' deair_r@sp@proj4string = CRS('+init=epsg:3857') -#' deair_rs = deair_r[,3751:3800] +#' deair <- STFDF(stations, dates, data.frame(PM10 = as.vector(air))) +#' deair_sf <- st_as_stars(deair, crs = "+proj=longlat +ellps=sphere") +#' deair_sf <- st_transform(deair_sf, 3857) +#' deair_r <- as(deair_sf, "STFDF") +#' deair_r@sp@proj4string <- CRS("+init=epsg:3857") +#' deair_rs <- deair_r[, 3751:3800] #' ## autoKrigeST.cv test -#' akst_cv_t = autoKrigeST.cv(formula = PM10~1, data = deair_rs, nfold = 3, fold_dim = 'temporal', -#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8) -#' akst_cv_s = autoKrigeST.cv(formula = PM10~1, data = deair_rs, nfold = 3, fold_dim = 'spatial', -#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8) -#' #akst_cv_r = autoKrigeST.cv(formula = PM10~1, data = deair_rs, nfold = 3, fold_dim = 'random', +#' akst_cv_t <- autoKrigeST.cv( +#' formula = PM10 ~ 1, data = deair_rs, nfold = 3, fold_dim = "temporal", +#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8 +#' ) +#' akst_cv_s <- autoKrigeST.cv( +#' formula = PM10 ~ 1, data = deair_rs, nfold = 3, fold_dim = "spatial", +#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8 +#' ) +#' # akst_cv_r = autoKrigeST.cv(formula = PM10~1, data = deair_rs, nfold = 3, fold_dim = 'random', #' # cutoff = 300000, width = 30000, tlags = 0:7, cores = 8) -#' akst_cv_spt = autoKrigeST.cv(formula = PM10~1, data = deair_rs, nfold = 4, fold_dim = 'spacetime', -#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8) +#' akst_cv_spt <- autoKrigeST.cv( +#' formula = PM10 ~ 1, data = deair_rs, nfold = 4, fold_dim = "spacetime", +#' cutoff = 300000, width = 30000, tlags = 0:7, cores = 8 +#' ) #' @export -autoKrigeST.cv = function(data, fold_dim, - nfold = 10L, - formula, - type_stv = 'sumMetric', - block = 0, - model = c("Sph", "Exp", "Gau", "Ste"), - kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), - fix.values = c(NA,NA,NA), - tlags=0:6, - cutoff=2e4, - width=5e2, - nmax = Inf, - aniso_method='vgm', - type_joint='Exp', - prodsum_k=0.25, - surface = FALSE, - start_vals = c(NA,NA,NA), - miscFitOptions = list(), - measurement_error = c(0,0,0), - cores = 1, - seed = 130425L){ - if (!grepl('^(spa|temp|tim|rand|)*', fold_dim)) { - stop('The argument fold_dim is not valid. Enter one of spatial, temporal, and random.') +autoKrigeST.cv <- function(data, fold_dim, + nfold = 10L, + formula, + type_stv = "sumMetric", + block = 0, + model = c("Sph", "Exp", "Gau", "Ste"), + kappa = c(0.05, seq(0.2, 2, 0.1), 5, 10), + fix.values = c(NA, NA, NA), + tlags = 0:6, + cutoff = 2e4, + width = 5e2, + nmax = Inf, + aniso_method = "vgm", + type_joint = "Exp", + prodsum_k = 0.25, + surface = FALSE, + start_vals = c(NA, NA, NA), + miscFitOptions = list(), + measurement_error = c(0, 0, 0), + cores = 1, + seed = 130425L) { + if (!grepl("^(spa|temp|tim|rand|)*", fold_dim)) { + stop("The argument fold_dim is not valid. Enter one of spatial, temporal, and random.") + } + + len_time <- length(data@time) + len_space <- length(data@sp) + set.seed(seed) + get_data_fold <- function(data, dimension = fold_dim, nfold) { + data_fold <- vector("list", length = nfold) + data_validation <- vector("list", length = nfold) + + if (grepl("^spatial$|^space$", dimension)) { + sp_coords <- coordinates(data@sp) + sp_coords_km <- kmeans(sp_coords, nfold) + indices <- sp_coords_km$cluster + + vv <- split(1:len_space, indices) + for (i in seq_len(length(vv))) { + idata <- data[-vv[[i]], ] + vdata <- data[vv[[i]], ] + data_fold[[i]] <- as(idata, "STSDF") + data_validation[[i]] <- as(vdata, "STSDF") + } } - - len_time = length(data@time) - len_space = length(data@sp) - set.seed(seed) - get_data_fold = function(data, dimension = fold_dim, nfold){ - data_fold = vector('list', length = nfold) - data_validation = vector('list', length = nfold) - - if (grepl('^spatial$|^space$', dimension)){ - sp_coords = coordinates(data@sp) - sp_coords_km = kmeans(sp_coords, nfold) - indices = sp_coords_km$cluster - - vv = split(1:len_space, indices) - for (i in seq_len(length(vv))) { - idata = data[-vv[[i]],] - vdata = data[vv[[i]],] - data_fold[[i]] = as(idata, 'STSDF') - data_validation[[i]] = as(vdata, 'STSDF') - } - - } - if (grepl('^(temp|tim)', dimension)) { - q = len_time %% nfold - if (q != 0) { - targ = floor(len_time/nfold) - v = rep(targ, nfold) - vindex = sample(nfold, q, replace = FALSE) - v[vindex] = v[vindex] + 1 - } else { - targ = len_time/nfold - v = rep(targ, nfold) - } - vv = split(c(1:len_time), rep(1:nfold, v)) - - for (i in seq_len(length(vv))) { - idata = data[,-vv[[i]]] - vdata = data[,vv[[i]]] - data_fold[[i]] = as(idata, 'STSDF') - data_validation[[i]] = as(vdata, 'STSDF') - - } - - } - if (grepl('^(spatiotemp|spacetime)', dimension)) { - if (sqrt(nfold) %% 1 > 0) { - stop("Spatiotemporal CV is only available for integer sqrt(nfold).") - } - q_t = len_time %% sqrt(nfold) - if (q_t != 0) { - targ = ceiling(len_time/nfold) - v_t = rep(targ, sqrt(nfold)) - v_tindex = sample(sqrt(nfold), q_t, replace = FALSE) - v_t[v_tindex] = v[v_tindex] + 1 - } else { - targ = len_time/sqrt(nfold) - v_t = rep(targ, sqrt(nfold)) - } - - sp_coords = coordinates(data@sp) - sp_coords_km = kmeans(sp_coords, sqrt(nfold)) - indices = sp_coords_km$cluster - - vv_sp = split(1:len_space, indices) - vv_t = split(c(1:len_time), rep(1:sqrt(nfold), v_t)) - #vv_sp = split(c(1:len_space), rep(1:sqrt(nfold), v_sp)) - - for (i in seq_len(length(vv_sp))) { - for (j in seq_len(length(vv_t))) { - - idata = data[-vv_sp[[i]], -vv_t[[j]]] - vdata = data[vv_sp[[i]], vv_t[[j]]] - data_fold[[sqrt(nfold) * (i-1) + j]] = as(idata, 'STSDF') - data_validation[[sqrt(nfold) * (i-1) + j]] = as(vdata, 'STSDF') - - } - } - - - - } - - if (grepl('^rand', dimension)){ - for (i in 1:nfold) { - data_sti = as(data, 'STIDF') - indices = sample(nrow(data_sti@data), ceiling(nrow(data_sti@data)/nfold), replace = FALSE) - indices = sort(indices, decreasing = TRUE) - data_fold[[i]] = data_sti[-indices,] - data_validation[[i]] = data_sti[indices,] - } + if (grepl("^(temp|tim)", dimension)) { + q <- len_time %% nfold + if (q != 0) { + targ <- floor(len_time / nfold) + v <- rep(targ, nfold) + vindex <- sample(nfold, q, replace = FALSE) + v[vindex] <- v[vindex] + 1 + } else { + targ <- len_time / nfold + v <- rep(targ, nfold) + } + vv <- split(c(1:len_time), rep(1:nfold, v)) + + for (i in seq_len(length(vv))) { + idata <- data[, -vv[[i]]] + vdata <- data[, vv[[i]]] + data_fold[[i]] <- as(idata, "STSDF") + data_validation[[i]] <- as(vdata, "STSDF") + } + } + if (grepl("^(spatiotemp|spacetime)", dimension)) { + if (sqrt(nfold) %% 1 > 0) { + stop("Spatiotemporal CV is only available for integer sqrt(nfold).") + } + q_t <- len_time %% sqrt(nfold) + if (q_t != 0) { + targ <- ceiling(len_time / nfold) + v_t <- rep(targ, sqrt(nfold)) + v_tindex <- sample(sqrt(nfold), q_t, replace = FALSE) + v_t[v_tindex] <- v[v_tindex] + 1 + } else { + targ <- len_time / sqrt(nfold) + v_t <- rep(targ, sqrt(nfold)) + } + + sp_coords <- coordinates(data@sp) + sp_coords_km <- kmeans(sp_coords, sqrt(nfold)) + indices <- sp_coords_km$cluster + + vv_sp <- split(1:len_space, indices) + vv_t <- split(c(1:len_time), rep(1:sqrt(nfold), v_t)) + # vv_sp = split(c(1:len_space), rep(1:sqrt(nfold), v_sp)) + + for (i in seq_len(length(vv_sp))) { + for (j in seq_len(length(vv_t))) { + idata <- data[-vv_sp[[i]], -vv_t[[j]]] + vdata <- data[vv_sp[[i]], vv_t[[j]]] + data_fold[[sqrt(nfold) * (i - 1) + j]] <- as(idata, "STSDF") + data_validation[[sqrt(nfold) * (i - 1) + j]] <- as(vdata, "STSDF") } - return(list(data_fold, data_validation)) + } } - folded = get_data_fold(data = data, dimension = fold_dim, nfold = nfold) - data_fold = folded[[1]] - data_validation = folded[[2]] - - cvresult = mapply(function(id, vd) { - kriged = autoKrigeST(formula = formula, - input_data = id, - new_data = vd, - type_stv = type_stv, - model = model, - block = block, - kappa = kappa, - fix.values = fix.values, - tlags = tlags, - cutoff = cutoff, - width = width, - nmax = nmax, - prodsum_k = prodsum_k, - start_vals = start_vals, - miscFitOptions = miscFitOptions, - measurement_error = measurement_error, - cores = cores)$krige_output$var1.pred - form_char = as.character(formula) - actual = as.vector(vd@data[,form_char[2]]) - - RMSE = sqrt(mean((kriged - actual)^2)) - MAE = mean(abs(kriged - actual)) - BIAS = mean(kriged) - mean(actual) - return(list(RMSE = RMSE, MAE = MAE, BIAS = BIAS)) - }, data_fold, data_validation, SIMPLIFY = FALSE) - - res = do.call(rbind, cvresult) - res = data.frame(cbind(CVFold=1:nfold, res)) - return(res) + if (grepl("^rand", dimension)) { + for (i in 1:nfold) { + data_sti <- as(data, "STIDF") + indices <- sample(nrow(data_sti@data), ceiling(nrow(data_sti@data) / nfold), replace = FALSE) + indices <- sort(indices, decreasing = TRUE) + data_fold[[i]] <- data_sti[-indices, ] + data_validation[[i]] <- data_sti[indices, ] + } + } + return(list(data_fold, data_validation)) + } + + folded <- get_data_fold(data = data, dimension = fold_dim, nfold = nfold) + data_fold <- folded[[1]] + data_validation <- folded[[2]] + + cvresult <- mapply(function(id, vd) { + kriged <- autoKrigeST( + formula = formula, + input_data = id, + new_data = vd, + type_stv = type_stv, + model = model, + block = block, + kappa = kappa, + fix.values = fix.values, + tlags = tlags, + cutoff = cutoff, + width = width, + nmax = nmax, + prodsum_k = prodsum_k, + start_vals = start_vals, + miscFitOptions = miscFitOptions, + measurement_error = measurement_error, + cores = cores + )$krige_output$var1.pred + form_char <- as.character(formula) + actual <- as.vector(vd@data[, form_char[2]]) + + RMSE <- sqrt(mean((kriged - actual)^2)) + MAE <- mean(abs(kriged - actual)) + BIAS <- mean(kriged) - mean(actual) + return(list(RMSE = RMSE, MAE = MAE, BIAS = BIAS)) + }, data_fold, data_validation, SIMPLIFY = FALSE) + + res <- do.call(rbind, cvresult) + res <- data.frame(cbind(CVFold = 1:nfold, res)) + return(res) } diff --git a/R/autofitVariogram.r b/R/autofitVariogram.r index 4e7fb2b..c994f13 100644 --- a/R/autofitVariogram.r +++ b/R/autofitVariogram.r @@ -1,216 +1,221 @@ -autofitVariogram = function(formula, input_data, input_vgm = NULL, model = c("Sph", "Exp", "Gau", "Ste"), - kappa = c(0.05, seq(0.1, 2, 0.1), 5, 10), fix.values = c(NA,NA,NA), - verbose = FALSE, GLS.model = NA, start_vals = c(NA,NA,NA), measurement_error = 0, - boundaries = c(2,4,6,9,12,15,25,35,50,65,80,100) * 50000, - miscFitOptions = list(),...) +autofitVariogram <- function(formula, input_data, input_vgm = NULL, model = c("Sph", "Exp", "Gau", "Ste"), + kappa = c(0.05, seq(0.1, 2, 0.1), 5, 10), fix.values = c(NA, NA, NA), + verbose = FALSE, GLS.model = NA, start_vals = c(NA, NA, NA), measurement_error = 0, + boundaries = c(2, 4, 6, 9, 12, 15, 25, 35, 50, 65, 80, 100) * 50000, + miscFitOptions = list(), ...) # This function automatically fits a variogram to input_data { - getModel = function(psill, model, range, kappa, nugget, fit_range, fit_sill, fit_nugget, measurement_error = 0, verbose){ - if(verbose) debug.level = 1 else debug.level = 0 - if(model == "Pow") { - warning("Using the power model is at your own risk, read the docs of autofitVariogram for more details.") - if(is.na(start_vals[1])) nugget = 0 - if(is.na(start_vals[2])) range = 1 # If a power mode, range == 1 is a better start value - if(is.na(start_vals[3])) sill = 1 - } - vgm_try = vgm(psill = psill, model = model, range = range, - nugget = nugget, kappa = kappa, Err = measurement_error) - obj = try(fit.variogram(experimental_variogram, - model = vgm_try, - fit.ranges = c(fit_range), - fit.sills = c(fit_nugget, fit_sill), - debug.level = 0), - TRUE) - if("try-error" %in% class(obj)) { - #print(traceback()) - warning("An error has occured during variogram fitting. Used:\n", - "\tnugget:\t", nugget, - "\n\tmodel:\t", model, - "\n\tpsill:\t", psill, - "\n\trange:\t", range, - "\n\tkappa:\t",ifelse(kappa == 0, NA, kappa), - "\n as initial guess. This particular variogram fit is not taken into account. \nGstat error:\n", obj) - return(NULL) - } else { - return(obj) - } + getModel <- function(psill, model, range, kappa, nugget, fit_range, fit_sill, fit_nugget, measurement_error = 0, verbose) { + if (verbose) debug.level <- 1 else debug.level <- 0 + if (model == "Pow") { + warning("Using the power model is at your own risk, read the docs of autofitVariogram for more details.") + if (is.na(start_vals[1])) nugget <- 0 + if (is.na(start_vals[2])) range <- 1 # If a power mode, range == 1 is a better start value + if (is.na(start_vals[3])) sill <- 1 + } + vgm_try <- vgm( + psill = psill, model = model, range = range, + nugget = nugget, kappa = kappa, Err = measurement_error + ) + obj <- try( + fit.variogram(experimental_variogram, + model = vgm_try, + fit.ranges = c(fit_range), + fit.sills = c(fit_nugget, fit_sill), + debug.level = 0 + ), + TRUE + ) + if ("try-error" %in% class(obj)) { + # print(traceback()) + warning( + "An error has occured during variogram fitting. Used:\n", + "\tnugget:\t", nugget, + "\n\tmodel:\t", model, + "\n\tpsill:\t", psill, + "\n\trange:\t", range, + "\n\tkappa:\t", ifelse(kappa == 0, NA, kappa), + "\n as initial guess. This particular variogram fit is not taken into account. \nGstat error:\n", obj + ) + return(NULL) + } else { + return(obj) } + } # if there is an input data - if (!is.null(input_data)){ - # Create boundaries - # if ST* or Spatial* object - if (sum(grepl('(Spatial|ST).*', class(input_data))) > 0) { - longlat = !is.projected(input_data) - if(is.na(longlat)) { - longlat = FALSE + if (!is.null(input_data)) { + # Create boundaries + # if ST* or Spatial* object + if (sum(grepl("(Spatial|ST).*", class(input_data))) > 0) { + longlat <- !is.projected(input_data) + if (is.na(longlat)) { + longlat <- FALSE } - diagonal = spDists(t(bbox(input_data)), longlat = longlat)[2] + diagonal <- spDists(t(bbox(input_data)), longlat = longlat)[2] } else { - longlat = st_is_longlat(input_data) - diagonal = st_as_sfc(st_bbox(input_data)) - diagonal = st_cast(diagonal, 'POINT') - diagonal = st_distance(diagonal[c(1,3)], longlat = longlat)[2,1] - diagonal = as.vector(diagonal) + longlat <- st_is_longlat(input_data) + diagonal <- st_as_sfc(st_bbox(input_data)) + diagonal <- st_cast(diagonal, "POINT") + diagonal <- st_distance(diagonal[c(1, 3)], longlat = longlat)[2, 1] + diagonal <- as.vector(diagonal) } - if (is.null(boundaries)){ - boundaries = c(2,4,6,9,12,15,25,35,50,65,80,100) * diagonal * 0.35/100 # # 0.35 times the length of the central axis through the area, Boundaries for the bins in km + if (is.null(boundaries)) { + boundaries <- c(2, 4, 6, 9, 12, 15, 25, 35, 50, 65, 80, 100) * diagonal * 0.35 / 100 # # 0.35 times the length of the central axis through the area, Boundaries for the bins in km } } - - if (!is.null(input_data) & is.null(input_vgm)){ - # Check for anisotropy parameters - if('alpha' %in% names(list(...))) warning('Anisotropic variogram model fitting not supported, see the documentation of autofitVariogram for more details.') + if (!is.null(input_data) & is.null(input_vgm)) { + # Check for anisotropy parameters + if ("alpha" %in% names(list(...))) warning("Anisotropic variogram model fitting not supported, see the documentation of autofitVariogram for more details.") - # Take the misc fit options and overwrite the defaults by the user specified ones - miscFitOptionsDefaults = list(merge.small.bins = TRUE, min.np.bin = 5) - miscFitOptions = modifyList(miscFitOptionsDefaults, miscFitOptions) + # Take the misc fit options and overwrite the defaults by the user specified ones + miscFitOptionsDefaults <- list(merge.small.bins = TRUE, min.np.bin = 5) + miscFitOptions <- modifyList(miscFitOptionsDefaults, miscFitOptions) - # If you specifiy a variogram model in GLS.model the Generelised least squares sample variogram is constructed - if(!is(GLS.model, "variogramModel")) { - experimental_variogram = variogram(formula, input_data,boundaries = boundaries, ...) - } else { - if(verbose) cat("Calculating GLS sample variogram\n") - g = gstat(NULL, "bla", formula, input_data, model = GLS.model, set = list(gls=1)) - experimental_variogram = variogram(g, boundaries = boundaries, ...) - } + # If you specifiy a variogram model in GLS.model the Generelised least squares sample variogram is constructed + if (!is(GLS.model, "variogramModel")) { + experimental_variogram <- variogram(formula, input_data, boundaries = boundaries, ...) + } else { + if (verbose) cat("Calculating GLS sample variogram\n") + g <- gstat(NULL, "bla", formula, input_data, model = GLS.model, set = list(gls = 1)) + experimental_variogram <- variogram(g, boundaries = boundaries, ...) + } - # request by Jon Skoien - if(miscFitOptions[["merge.small.bins"]]) { - if(verbose) cat("Checking if any bins have less than 5 points, merging bins when necessary...\n\n") - while(TRUE) { - if(length(experimental_variogram$np[experimental_variogram$np < miscFitOptions[["min.np.bin"]]]) == 0 | length(boundaries) == 1) break - boundaries = boundaries[2:length(boundaries)] - if(!is(GLS.model, "variogramModel")) { - experimental_variogram = variogram(formula, input_data,boundaries = boundaries, ...) - } else { - experimental_variogram = variogram(g, boundaries = boundaries, ...) - } + # request by Jon Skoien + if (miscFitOptions[["merge.small.bins"]]) { + if (verbose) cat("Checking if any bins have less than 5 points, merging bins when necessary...\n\n") + while (TRUE) { + if (length(experimental_variogram$np[experimental_variogram$np < miscFitOptions[["min.np.bin"]]]) == 0 | length(boundaries) == 1) break + boundaries <- boundaries[2:length(boundaries)] + if (!is(GLS.model, "variogramModel")) { + experimental_variogram <- variogram(formula, input_data, boundaries = boundaries, ...) + } else { + experimental_variogram <- variogram(g, boundaries = boundaries, ...) } } - - } else { - experimental_variogram <- input_vgm - diagonal <- max(input_vgm$dist) } + } else { + experimental_variogram <- input_vgm + diagonal <- max(input_vgm$dist) + } - if (!is.null(model) | !is.null(input_vgm)){ + if (!is.null(model) | !is.null(input_vgm)) { + # set initial values + if (is.na(start_vals[1])) { # Nugget + initial_nugget <- min(experimental_variogram$gamma) * 0.5 + } else { + initial_nugget <- start_vals[1] + } + if (is.na(start_vals[2])) { # Range + initial_range <- 0.1 * diagonal # 0.10 times the length of the central axis through the area + } else { + initial_range <- start_vals[2] + } + if (is.na(start_vals[3])) { # Sill + initial_sill <- max(experimental_variogram$gamma) + } else { + initial_sill <- start_vals[3] + } - # set initial values - if(is.na(start_vals[1])) { # Nugget - initial_nugget = min(experimental_variogram$gamma) * 0.5 - } else { - initial_nugget = start_vals[1] - } - if(is.na(start_vals[2])) { # Range - initial_range = 0.1 * diagonal # 0.10 times the length of the central axis through the area - } else { - initial_range = start_vals[2] - } - if(is.na(start_vals[3])) { # Sill - initial_sill = max(experimental_variogram$gamma) - } else { - initial_sill = start_vals[3] - } + # Determine what should be automatically fitted and what should be fixed + # Nugget + if (!is.na(fix.values[1])) { + fit_nugget <- FALSE + initial_nugget <- fix.values[1] + } else { + fit_nugget <- TRUE + } + # Range + if (!is.na(fix.values[2])) { + fit_range <- FALSE + initial_range <- fix.values[2] + } else { + fit_range <- TRUE + } + # Partial sill + if (!is.na(fix.values[3])) { + fit_sill <- FALSE + initial_sill <- fix.values[3] + } else { + fit_sill <- TRUE + } - # Determine what should be automatically fitted and what should be fixed - # Nugget - if(!is.na(fix.values[1])) - { - fit_nugget = FALSE - initial_nugget = fix.values[1] - } else { - fit_nugget = TRUE - } - # Range - if(!is.na(fix.values[2])) - { - fit_range = FALSE - initial_range = fix.values[2] - } else { - fit_range = TRUE - } - # Partial sill - if(!is.na(fix.values[3])) - { - fit_sill = FALSE - initial_sill = fix.values[3] - } else { - fit_sill = TRUE - } - - if (measurement_error != 0){ - #initial_nugget = min(initial_nugget - measurement_error, measurement_error) - initial_sill = max(initial_sill - measurement_error, measurement_error) - } + if (measurement_error != 0) { + # initial_nugget = min(initial_nugget - measurement_error, measurement_error) + initial_sill <- max(initial_sill - measurement_error, measurement_error) + } - # Automatically testing different models, the one with the smallest sums-of-squares is chosen - test_models = model - SSerr_list = c() - vgm_list = list() - counter = 1 - - for(m in test_models) { - if(m != "Mat" && m != "Ste") { # If not Matern and not Stein - model_fit = getModel(initial_sill - initial_nugget, m, initial_range, kappa = 0, initial_nugget, fit_range, fit_sill, fit_nugget, verbose = verbose, measurement_error = measurement_error) - if(!is.null(model_fit)) { # skip models that failed - vgm_list[[counter]] = model_fit - SSerr_list = c(SSerr_list, attr(model_fit, "SSErr"))} - counter = counter + 1 - } else { # Else loop also over kappa values - for(k in kappa) { - model_fit = getModel(initial_sill - initial_nugget, m, initial_range, k, initial_nugget, fit_range, fit_sill, fit_nugget, verbose = verbose, measurement_error = measurement_error) - if(!is.null(model_fit)) { - vgm_list[[counter]] = model_fit - SSerr_list = c(SSerr_list, attr(model_fit, "SSErr"))} - counter = counter + 1 - } - } - } + # Automatically testing different models, the one with the smallest sums-of-squares is chosen + test_models <- model + SSerr_list <- c() + vgm_list <- list() + counter <- 1 - # Check for negative values in sill or range coming from fit.variogram - # and NULL values in vgm_list, and remove those with a warning - strange_entries = sapply(vgm_list, function(v) any(c(v$psill, v$range) < 0) | is.null(v)) - if(any(strange_entries)) { - if(verbose) { - print(vgm_list[strange_entries]) - cat("^^^ ABOVE MODELS WERE REMOVED ^^^\n\n") - } - warning("Some models where removed for being either NULL or having a negative sill/range/nugget, \n\tset verbose == TRUE for more information") - SSerr_list = SSerr_list[!strange_entries] - vgm_list = vgm_list[!strange_entries] + for (m in test_models) { + if (m != "Mat" && m != "Ste") { # If not Matern and not Stein + model_fit <- getModel(initial_sill - initial_nugget, m, initial_range, kappa = 0, initial_nugget, fit_range, fit_sill, fit_nugget, verbose = verbose, measurement_error = measurement_error) + if (!is.null(model_fit)) { # skip models that failed + vgm_list[[counter]] <- model_fit + SSerr_list <- c(SSerr_list, attr(model_fit, "SSErr")) + } + counter <- counter + 1 + } else { # Else loop also over kappa values + for (k in kappa) { + model_fit <- getModel(initial_sill - initial_nugget, m, initial_range, k, initial_nugget, fit_range, fit_sill, fit_nugget, verbose = verbose, measurement_error = measurement_error) + if (!is.null(model_fit)) { + vgm_list[[counter]] <- model_fit + SSerr_list <- c(SSerr_list, attr(model_fit, "SSErr")) } + counter <- counter + 1 + } + } + } - if(verbose) { - cat("Selected:\n") - print(vgm_list[[which.min(SSerr_list)]]) - cat("\nTested models, best first:\n") - tested = data.frame("Tested models" = sapply(vgm_list, function(x) as.character(x[2,1])), - kappa = sapply(vgm_list, function(x) as.character(x[2,4])), - "SSerror" = SSerr_list) - tested = tested[order(tested$SSerror),] - print(tested) - } + # Check for negative values in sill or range coming from fit.variogram + # and NULL values in vgm_list, and remove those with a warning + strange_entries <- sapply(vgm_list, function(v) any(c(v$psill, v$range) < 0) | is.null(v)) + if (any(strange_entries)) { + if (verbose) { + print(vgm_list[strange_entries]) + cat("^^^ ABOVE MODELS WERE REMOVED ^^^\n\n") + } + warning("Some models where removed for being either NULL or having a negative sill/range/nugget, \n\tset verbose == TRUE for more information") + SSerr_list <- SSerr_list[!strange_entries] + vgm_list <- vgm_list[!strange_entries] + } - result = list(exp_var = experimental_variogram, var_model = vgm_list[[which.min(SSerr_list)]], sserr = min(SSerr_list)) + if (verbose) { + cat("Selected:\n") + print(vgm_list[[which.min(SSerr_list)]]) + cat("\nTested models, best first:\n") + tested <- data.frame( + "Tested models" = sapply(vgm_list, function(x) as.character(x[2, 1])), + kappa = sapply(vgm_list, function(x) as.character(x[2, 4])), + "SSerror" = SSerr_list + ) + tested <- tested[order(tested$SSerror), ] + print(tested) } - #if (is.null(model) & ) - if (is.null(model) & !is.null(input_data)){ + result <- list(exp_var = experimental_variogram, var_model = vgm_list[[which.min(SSerr_list)]], sserr = min(SSerr_list)) + } + + # if (is.null(model) & ) + if (is.null(model) & !is.null(input_data)) { svar.af <- svariso(input_data, as.character(formula)[2], maxlag = boundaries[length(boundaries)], nlags = length(boundaries)) svar.af <- fitsvar.sb.iso(svar.af, dk = 10) result <- list(exp_var = svar.af, var_model = vgm.tab.svarmod(svar.af, seq(0, svar.af$range, length = 5000))) } - if (is.null(model) & !is.null(input_vgm)){ + if (is.null(model) & !is.null(input_vgm)) { svar.af <- .as.svariso.variogram(input_vgm) svar.af <- fitsvar.sb.iso(svar.af, dk = 10) result <- list(exp_var = svar.af, var_model = vgm.tab.svarmod(svar.af, seq(0, svar.af$range, length = 5000))) } - class(result) = c("autofitVariogram","list") + class(result) <- c("autofitVariogram", "list") return(result) } diff --git a/R/autofitVariogramST.r b/R/autofitVariogramST.r index e152c1f..4dbfacb 100644 --- a/R/autofitVariogramST.r +++ b/R/autofitVariogramST.r @@ -14,128 +14,153 @@ #### surface: whether plot the empirical spatiotemporal variogram or not #### measurement_error: measurement error variance component. Refer to ?gstat::vgm; the input vector includes spatial, temporal, and joint measurement error components. #### cores: passed to variogramST. The number of cores to be used to compute semivariogram values -autofitVariogramST <- function( - stf, - formula, - typestv='sumMetric', - candidate_model=c('Ste','Exc','Exp','Wav'), - guess_nugget = NULL, - guess_psill = NULL, - tlags=0:6, - cutoff=2e4, - width=5e2, - aniso_method='vgm', - type_joint='Exp', - prodsum_k=NULL, - surface = FALSE, - measurement_error = c(0,0,0), - cores = 1, - verbose = FALSE - ){ - - stva <- setSTI(stf=stf, - formula = formula, - tlags = tlags, - cutoff = cutoff, - width = width, - wireframe=FALSE, - cores = cores) - if (is.null(candidate_model)){ +autofitVariogramST <- function(stf, + formula, + typestv = "sumMetric", + candidate_model = c("Ste", "Exc", "Exp", "Wav"), + guess_nugget = NULL, + guess_psill = NULL, + tlags = 0:6, + cutoff = 2e4, + width = 5e2, + aniso_method = "vgm", + type_joint = "Exp", + prodsum_k = NULL, + surface = FALSE, + measurement_error = c(0, 0, 0), + cores = 1, + verbose = FALSE) { + stva <- setSTI( + stf = stf, + formula = formula, + tlags = tlags, + cutoff = cutoff, + width = width, + wireframe = FALSE, + cores = cores + ) + if (is.null(candidate_model)) { stva.sp <- marginal.variogramST(stva, - bound = cutoff) + bound = cutoff + ) stva.ts <- marginal.variogramST(stva, - spatial = FALSE) - stva.sp.fit <- autofitVariogram(formula=NULL, verbose=verbose, - input_data = NULL, - input_vgm = stva.sp, - measurement_error = measurement_error[1], - model = NULL, - ) - stva.ts.fit <- autofitVariogram(formula=NULL, verbose=verbose, - input_data = NULL, - input_vgm = stva.ts, - measurement_error = measurement_error[2], - model = NULL) - aniso_method <- 'linear' + spatial = FALSE + ) + stva.sp.fit <- autofitVariogram( + formula = NULL, verbose = verbose, + input_data = NULL, + input_vgm = stva.sp, + measurement_error = measurement_error[1], + model = NULL, + ) + stva.ts.fit <- autofitVariogram( + formula = NULL, verbose = verbose, + input_data = NULL, + input_vgm = stva.ts, + measurement_error = measurement_error[2], + model = NULL + ) + aniso_method <- "linear" } else { stva.sp <- marginal.variogramST(stva, - bound = cutoff) + bound = cutoff + ) stva.ts <- marginal.variogramST(stva, - spatial = FALSE) + spatial = FALSE + ) stva.sp$np <- as.numeric(stva.sp$np) stva.ts$np <- as.numeric(stva.ts$np) - stva.sp.fit <- autofitVariogram(formula=NULL, verbose=verbose, - input_data = NULL, - input_vgm = stva.sp, - measurement_error = measurement_error[1], - model = candidate_model) - stva.ts.fit <- autofitVariogram(formula=NULL, verbose=verbose, - input_data = NULL, - input_vgm = stva.ts, - measurement_error = measurement_error[2], - model = candidate_model) - + stva.sp.fit <- autofitVariogram( + formula = NULL, verbose = verbose, + input_data = NULL, + input_vgm = stva.sp, + measurement_error = measurement_error[1], + model = candidate_model + ) + stva.ts.fit <- autofitVariogram( + formula = NULL, verbose = verbose, + input_data = NULL, + input_vgm = stva.ts, + measurement_error = measurement_error[2], + model = candidate_model + ) } - #if (typestv == 'separable'){ + # if (typestv == 'separable'){ # sill.init <- median(stva$gamma) # vgm.spatial$psill <- vgm.spatial$psill / sill.init # vgm.temporal$psill <- vgm.temporal$psill / sill.init - #} + # } stv.ani <- estiStAni(stva, - interval = c(0.2, 2)*median(stva$spacelag), - method = aniso_method, - spatialVgm = stva.sp.fit$var_model, - temporalVgm = stva.ts.fit$var_model) - if (is.null(guess_nugget)){ + interval = c(0.2, 2) * median(stva$spacelag), + method = aniso_method, + spatialVgm = stva.sp.fit$var_model, + temporalVgm = stva.ts.fit$var_model + ) + if (is.null(guess_nugget)) { guess_nugget <- max(min(stva$gamma), min(stva$gamma) - 0.5 * (min(stva.sp$gamma) + min(stva.ts$gamma))) - guess_nugget = ifelse(guess_nugget < 0, 0, guess_nugget) + guess_nugget <- ifelse(guess_nugget < 0, 0, guess_nugget) } - if (is.null(guess_psill)){ - guess_psill_c1 <- ifelse(0.5 *(max(stva$gamma) - max(stva.sp$gamma, stva.ts$gamma) ) < 0, - min(stva$gamma) * 2, 0.5 * (max(stva$gamma) - max(stva.sp$gamma, stva.ts$gamma))) - guess_psill_c2 <- ifelse(0.5* (stva$gamma[length(stva$gamma)] - max(stva.sp$gamma, stva.ts$gamma)) < 0, - min(stva$gamma) * 2, 0.5* (stva$gamma[length(stva$gamma)] - max(stva.sp$gamma, stva.ts$gamma))) - if (typestv == 'metric'){ - guess_psill <- 0.5 * max(stva.sp$gamma) - } else { - guess_psill <- max(0.1*max(stva.sp$gamma), min(guess_psill_c1, guess_psill_c2)) - } + if (is.null(guess_psill)) { + guess_psill_c1 <- ifelse(0.5 * (max(stva$gamma) - max(stva.sp$gamma, stva.ts$gamma)) < 0, + min(stva$gamma) * 2, 0.5 * (max(stva$gamma) - max(stva.sp$gamma, stva.ts$gamma)) + ) + guess_psill_c2 <- ifelse(0.5 * (stva$gamma[length(stva$gamma)] - max(stva.sp$gamma, stva.ts$gamma)) < 0, + min(stva$gamma) * 2, 0.5 * (stva$gamma[length(stva$gamma)] - max(stva.sp$gamma, stva.ts$gamma)) + ) + if (typestv == "metric") { + guess_psill <- 0.5 * max(stva.sp$gamma) + } else { + guess_psill <- max(0.1 * max(stva.sp$gamma), min(guess_psill_c1, guess_psill_c2)) + } } - sill <- max(stva$gamma)*0.6 - stv.jo <- vgm(model = type_joint, - psill = guess_psill, nugget = guess_nugget, - Err = measurement_error[3], - range = 0.25 * sqrt((stv.ani)^2 + (stv.ani * max(stva$spacelag)^2))) + sill <- max(stva$gamma) * 0.6 + stv.jo <- vgm( + model = type_joint, + psill = guess_psill, nugget = guess_nugget, + Err = measurement_error[3], + range = 0.25 * sqrt((stv.ani)^2 + (stv.ani * max(stva$spacelag)^2)) + ) - if (is.null(prodsum_k)){ - prodsum_k <- 4/max(stva.sp$gamma) - } + if (is.null(prodsum_k)) { + prodsum_k <- 4 / max(stva.sp$gamma) + } ## sp phi, sigmasq, tausq - ts-joint - total sill and nugget, stani variost.mod <- switch(typestv, - separable = vgmST(stModel = typestv, space = stva.sp.fit$var_model, - time = stva.ts.fit$var_model, sill = sill, - nugget = guess_nugget), - productSum = vgmST(stModel = typestv, - space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, - k = prodsum_k), - productSumOld = vgmST(stModel = typestv, - space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, - sill = guess_psill * sqrt(2), nugget = guess_nugget), - sumMetric = vgmST(stModel = typestv, space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, - joint = stv.jo, stAni = stv.ani), - simpleSumMetric = vgmST(stModel = typestv, - space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, - joint = stv.jo, - nugget = guess_nugget, stAni = stv.ani), - metric = vgmST(stModel = typestv, joint = stv.jo, stAni = stv.ani), - stop(paste("model", typest, "unknown"))) + separable = vgmST( + stModel = typestv, space = stva.sp.fit$var_model, + time = stva.ts.fit$var_model, sill = sill, + nugget = guess_nugget + ), + productSum = vgmST( + stModel = typestv, + space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, + k = prodsum_k + ), + productSumOld = vgmST( + stModel = typestv, + space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, + sill = guess_psill * sqrt(2), nugget = guess_nugget + ), + sumMetric = vgmST( + stModel = typestv, space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, + joint = stv.jo, stAni = stv.ani + ), + simpleSumMetric = vgmST( + stModel = typestv, + space = stva.sp.fit$var_model, time = stva.ts.fit$var_model, + joint = stv.jo, + nugget = guess_nugget, stAni = stv.ani + ), + metric = vgmST(stModel = typestv, joint = stv.jo, stAni = stv.ani), + stop(paste("model", typest, "unknown")) + ) - joint.lower=extractPar(variost.mod) * 0.25 - joint.upper=extractPar(variost.mod) * 1.5 + joint.lower <- extractPar(variost.mod) * 0.25 + joint.upper <- extractPar(variost.mod) * 1.5 if (any(is.infinite(joint.lower))) joint.lower[which(is.infinite(joint.lower))] <- 0 if (any(is.infinite(joint.upper))) joint.upper[which(is.infinite(joint.upper))] <- 1e4 if (any(is.na(joint.lower))) joint.lower[which(is.na(joint.lower))] <- 0 @@ -143,26 +168,30 @@ autofitVariogramST <- function( if (any(joint.lower < 0)) joint.lower[which(joint.lower < 0)] <- 0 if (any(joint.upper < 0)) joint.upper[which(joint.upper < 0)] <- 1e2 - stva.joint <- fit.StVariogram(object = stva, model = variost.mod, stAni = stv.ani, - method='L-BFGS-B', - lower = joint.lower, upper = joint.upper, - control = list(maxit=2.5e3, REPORT=1)) - - if (surface){ - STVS <- variogramSurface(stva.joint, stva[,c('timelag', 'spacelag')]) - autofitSTV <- list(jointSTV=stva.joint, - empSTV=stva, - SpV=stva.sp.fit, - TV=stva.ts.fit, - STVsurface=STVS) + stva.joint <- fit.StVariogram( + object = stva, model = variost.mod, stAni = stv.ani, + method = "L-BFGS-B", + lower = joint.lower, upper = joint.upper, + control = list(maxit = 2.5e3, REPORT = 1) + ) + if (surface) { + STVS <- variogramSurface(stva.joint, stva[, c("timelag", "spacelag")]) + autofitSTV <- list( + jointSTV = stva.joint, + empSTV = stva, + SpV = stva.sp.fit, + TV = stva.ts.fit, + STVsurface = STVS + ) } else { - autofitSTV <- list(jointSTV=stva.joint, - empSTV=stva, - SpV=stva.sp.fit, - TV=stva.ts.fit) - + autofitSTV <- list( + jointSTV = stva.joint, + empSTV = stva, + SpV = stva.sp.fit, + TV = stva.ts.fit + ) } - return(autofitSTV) + return(autofitSTV) } diff --git a/R/create_new_data.r b/R/create_new_data.r index cfdb0f1..c73c616 100644 --- a/R/create_new_data.r +++ b/R/create_new_data.r @@ -6,37 +6,37 @@ #' @param return_class character(1). One of 'sf' and 'sp' #' @return A sf (return_class == 'sf') or SpatialPointsDataFrame (return_class == 'sp'). #' @export -create_new_data = function(obj, gen_mode = 'chull', npoints = 1e4, return_class = 'sf') { -# Function that creates a new_data object if one is missing - distinguishsf = function(infeat) { - if (any(!"sf" %in% class(infeat)) | !grepl("^Spatial", class(infeat))) { - stop("The class is neither sf nor Spatial*. Please check your input.") - } - if (any("sf" %in% class(infeat))) { - return(infeat) - } else { - st_as_sf(infeat) - #as(infeat, "Spatial") - } - } - - if (gen_mode == 'rect') { - obj.b <- st_as_sf(obj) - genextent <- st_as_sfc(st_bbox(obj.b)) - # d <- as(obj.rect, 'Spatial') - } else { - genextent = st_convex_hull(obj) - # convex_hull = chull(coordinates(obj)[,1],coordinates(obj)[,2]) - # convex_hull = c(convex_hull, convex_hull[1]) # Close the polygon - # d = Polygon(coordinates(obj)[convex_hull, ]) - #d = convex_hull - } - new_data = st_sample(genextent, npoints, type = "regular") - #gridded(new_data) = TRUE - if (return_class == "sp") { - new_data = as(new_data, "Spatial") - } - return(new_data) +create_new_data <- function(obj, gen_mode = "chull", npoints = 1e4, return_class = "sf") { + # Function that creates a new_data object if one is missing + distinguishsf <- function(infeat) { + if (any(!"sf" %in% class(infeat)) | !grepl("^Spatial", class(infeat))) { + stop("The class is neither sf nor Spatial*. Please check your input.") + } + if (any("sf" %in% class(infeat))) { + return(infeat) + } else { + st_as_sf(infeat) + # as(infeat, "Spatial") + } + } + + if (gen_mode == "rect") { + obj.b <- st_as_sf(obj) + genextent <- st_as_sfc(st_bbox(obj.b)) + # d <- as(obj.rect, 'Spatial') + } else { + genextent <- st_convex_hull(obj) + # convex_hull = chull(coordinates(obj)[,1],coordinates(obj)[,2]) + # convex_hull = c(convex_hull, convex_hull[1]) # Close the polygon + # d = Polygon(coordinates(obj)[convex_hull, ]) + # d = convex_hull + } + new_data <- st_sample(genextent, npoints, type = "regular") + # gridded(new_data) = TRUE + if (return_class == "sp") { + new_data <- as(new_data, "Spatial") + } + return(new_data) } @@ -47,11 +47,11 @@ create_new_data = function(obj, gen_mode = 'chull', npoints = 1e4, return_class #' @param temporal xts object. #' @return A character that indicates the temporal unit of the input xts object. #' @export -detect_temporal_unit <- function(temporal){ +detect_temporal_unit <- function(temporal) { if (!any(grepl("(zoo|xts)", class(temporal)))) { - detect_temporal_unit.generic(temporal) + detect_temporal_unit.generic(temporal) } else { - detect_temporal_unit.xts(temporal) + detect_temporal_unit.xts(temporal) } } @@ -60,18 +60,17 @@ detect_temporal_unit <- function(temporal){ #' @param temporal a xts object. #' @return A character that indicates the temporal unit of the input xts object. #' @export -detect_temporal_unit.xts <- function(temporal){ - +detect_temporal_unit.xts <- function(temporal) { gcdt <- unique(diff(sort(unique(attributes(temporal)$index)))) - if (length(gcdt) > 1) stop('The data has the incompatible irregular temporal difference') - if (gcdt == 86400){ - tunit <- 'days' - } else if (gcdt == 3600){ - tunit <- 'hours' - } else if (gcdt == 60){ - tunit <- 'minutes' + if (length(gcdt) > 1) stop("The data has the incompatible irregular temporal difference") + if (gcdt == 86400) { + tunit <- "days" + } else if (gcdt == 3600) { + tunit <- "hours" + } else if (gcdt == 60) { + tunit <- "minutes" } else { - tunit <- 'unknown' + tunit <- "unknown" } return(tunit) } @@ -82,26 +81,25 @@ detect_temporal_unit.xts <- function(temporal){ #' @param temporal a lubridate/POSIXct object. #' @return A character that indicates the temporal unit of the input xts object. #' @export -detect_temporal_unit.generic <- function(temporal){ - - cldt = class(temporal) - gcdt = diff(sort(unique(temporal))) - unique_gcdt = unique(gcdt) - len_gcdt = length(unique_gcdt) +detect_temporal_unit.generic <- function(temporal) { + cldt <- class(temporal) + gcdt <- diff(sort(unique(temporal))) + unique_gcdt <- unique(gcdt) + len_gcdt <- length(unique_gcdt) if (is.null(gcdt)) { - stop("No time difference is detected.\n") + stop("No time difference is detected.\n") } else { - if (len_gcdt > 1) { - stop("The data has the incompatible irregular temporal difference.\n") - } else if (len_gcdt == 0) { - stop("No temporal difference is detected.\n") - } - if (any(grepl( "Date", cldt))) { - tunit <- 'days' - } else { - tunit = attr(gcdt, "units") - } + if (len_gcdt > 1) { + stop("The data has the incompatible irregular temporal difference.\n") + } else if (len_gcdt == 0) { + stop("No temporal difference is detected.\n") + } + if (any(grepl("Date", cldt))) { + tunit <- "days" + } else { + tunit <- attr(gcdt, "units") + } } return(tunit) } @@ -117,44 +115,45 @@ detect_temporal_unit.generic <- function(temporal){ #' @param forward integer. the time length of the data will generate ahead of the last time point of the input data. If NULL is passed, the spatiotemporal interpolation mode in obj will be conducted. #' @return A STFDF object. #' @export -create_new_data.ST.legacy <- function(obj, form, gen_mode = 'chull', npoints = 1e4, forward=6){ - sp.base <- obj@sp - tunit <- detect_temporal_unit(obj@time) - if (is.null(tunit)) { - ts.base <- sort(unique(index(obj@time))) - ts.base <- ts.base[length(ts.base)] - } - else { - if (tunit == 'days'){ - ts.base <- sort(unique(index(obj@time))) - ts.base <- as.Date(ts.base)[length(ts.base)] - } else if (tunit != 'days' & tunit != 'unknown') { - ts.base <- sort(unique(index(obj@time))) - ts.base <- ts.base[length(ts.base)] - } - } - - sp.base <- create_new_data(sp.base, gen_mode = gen_mode, npoints = npoints) - sp.base <- sp::spTransform(sp.base, proj4string(obj@sp)) - if (!is.null(forward)) { - # TODO: temporal unit-dependent setting - if (tunit == 'hours') { - ts.base <- tunit + seq(0, 3600 * forward, 3600) - } else if (tunit == 'minutes') { - ts.base <- ts.base + seq(0, 60 * forward, 60) - } else { - ts.base <- ts.base + 1:forward - } - } else { - ts.base <- as.Date(sort(unique(index(obj@time)))) - } - dat = data.frame(dat = rep(1, length(sp.base) * length(ts.base))) - colnames(dat)[1] <- as.character(form)[2] - new_data_ST <- STFDF(sp = sp.base, - time = ts.base, - data = dat) - new_data_ST@sp@proj4string = obj@sp@proj4string - return(new_data_ST) +create_new_data.ST.legacy <- function(obj, form, gen_mode = "chull", npoints = 1e4, forward = 6) { + sp.base <- obj@sp + tunit <- detect_temporal_unit(obj@time) + if (is.null(tunit)) { + ts.base <- sort(unique(index(obj@time))) + ts.base <- ts.base[length(ts.base)] + } else { + if (tunit == "days") { + ts.base <- sort(unique(index(obj@time))) + ts.base <- as.Date(ts.base)[length(ts.base)] + } else if (tunit != "days" & tunit != "unknown") { + ts.base <- sort(unique(index(obj@time))) + ts.base <- ts.base[length(ts.base)] + } + } + + sp.base <- create_new_data(sp.base, gen_mode = gen_mode, npoints = npoints) + sp.base <- sp::spTransform(sp.base, proj4string(obj@sp)) + if (!is.null(forward)) { + # TODO: temporal unit-dependent setting + if (tunit == "hours") { + ts.base <- tunit + seq(0, 3600 * forward, 3600) + } else if (tunit == "minutes") { + ts.base <- ts.base + seq(0, 60 * forward, 60) + } else { + ts.base <- ts.base + 1:forward + } + } else { + ts.base <- as.Date(sort(unique(index(obj@time)))) + } + dat <- data.frame(dat = rep(1, length(sp.base) * length(ts.base))) + colnames(dat)[1] <- as.character(form)[2] + new_data_ST <- STFDF( + sp = sp.base, + time = ts.base, + data = dat + ) + new_data_ST@sp@proj4string <- obj@sp@proj4string + return(new_data_ST) } @@ -167,65 +166,65 @@ create_new_data.ST.legacy <- function(obj, form, gen_mode = 'chull', npoints = 1 #' @param forward integer. the time length of the data will generate ahead of the last time point of the input data. If NULL is passed, the spatiotemporal interpolation mode in obj will be conducted. #' @return A sftime object. #' @export -create_new_data.ST <- function( - obj, - form, - gen_mode = 'chull', - npoints = 1e4, - forward=6) { - st.geom = obj[[attr(obj, "sf_column")]] - st.time = obj[[attr(obj, "time_column")]] - sp.base = st.geom - #sp.base <- st_as_sfc(unique(st.geom)) - tunit <- detect_temporal_unit(st.time) - if (tunit == 'days'){ - ts.base <- sort(unique(st.time)) - ts.base <- as.Date(ts.base)[length(ts.base)] - } else if (tunit != 'days' & tunit != 'unknown') { - ts.base <- sort(unique(st.time)) - ts.base <- ts.base[length(ts.base)] - } - sp.base <- create_new_data(sp.base, gen_mode = gen_mode, npoints = npoints, return_class = "sf") - # st_crs(sp.base) = st_crs(obj) - # sp.base <- sp::spTransform(sp.base, proj4string(obj@sp)) - if (!is.null(forward)) { - # TODO: temporal unit-dependent setting - ts.base = - switch(tunit, - days = ts.base + seq(1, forward, 1), - hours = ts.base + seq(3600, 3600 * forward, 3600), - minutes = ts.base + seq(60, 60 * forward, 60), - seconds = ts.base + seq(1, forward, 1)) - - # if (tunit == 'hours') { - # ts.base <- tunit + seq(0, 3600 * forward, 3600) - # } else if (tunit == 'minutes') { - # ts.base <- ts.base + seq(0, 60 * forward, 60) - # } else { - # ts.base <- ts.base + 1:forward - # } - } else { - ts.base <- sort(unique(st.time)) - } - #dat = data.frame(dat = rep(1, length(sp.base) * length(ts.base))) - # new_data_ST <- STFDF(sp = sp.base, - # time = ts.base, - # data = dat) - NT = length(unique(ts.base)) - NS = length(sp.base) - sp.base = rep(sp.base, NT) - ts.base = rep(ts.base, each = NS) - new_data_ST = data.frame( - outcome = NA, - time = ts.base, - geometry = sp.base - ) - colnames(new_data_ST)[1] <- as.character(form)[2] - new_data_ST = st_as_sftime(new_data_ST, - sf_column_name = "geometry", - time_column_name = "time" - ) - st_crs(new_data_ST) = st_crs(obj) - # new_data_ST@sp@proj4string = obj@sp@proj4string - return(new_data_ST) +create_new_data.ST <- function(obj, + form, + gen_mode = "chull", + npoints = 1e4, + forward = 6) { + st.geom <- obj[[attr(obj, "sf_column")]] + st.time <- obj[[attr(obj, "time_column")]] + sp.base <- st.geom + # sp.base <- st_as_sfc(unique(st.geom)) + tunit <- detect_temporal_unit(st.time) + if (tunit == "days") { + ts.base <- sort(unique(st.time)) + ts.base <- as.Date(ts.base)[length(ts.base)] + } else if (tunit != "days" & tunit != "unknown") { + ts.base <- sort(unique(st.time)) + ts.base <- ts.base[length(ts.base)] + } + sp.base <- create_new_data(sp.base, gen_mode = gen_mode, npoints = npoints, return_class = "sf") + # st_crs(sp.base) = st_crs(obj) + # sp.base <- sp::spTransform(sp.base, proj4string(obj@sp)) + if (!is.null(forward)) { + # TODO: temporal unit-dependent setting + ts.base <- + switch(tunit, + days = ts.base + seq(1, forward, 1), + hours = ts.base + seq(3600, 3600 * forward, 3600), + minutes = ts.base + seq(60, 60 * forward, 60), + seconds = ts.base + seq(1, forward, 1) + ) + + # if (tunit == 'hours') { + # ts.base <- tunit + seq(0, 3600 * forward, 3600) + # } else if (tunit == 'minutes') { + # ts.base <- ts.base + seq(0, 60 * forward, 60) + # } else { + # ts.base <- ts.base + 1:forward + # } + } else { + ts.base <- sort(unique(st.time)) + } + # dat = data.frame(dat = rep(1, length(sp.base) * length(ts.base))) + # new_data_ST <- STFDF(sp = sp.base, + # time = ts.base, + # data = dat) + NT <- length(unique(ts.base)) + NS <- length(sp.base) + sp.base <- rep(sp.base, NT) + ts.base <- rep(ts.base, each = NS) + new_data_ST <- data.frame( + outcome = NA, + time = ts.base, + geometry = sp.base + ) + colnames(new_data_ST)[1] <- as.character(form)[2] + new_data_ST <- st_as_sftime(new_data_ST, + sf_column_name = "geometry", + time_column_name = "time" + ) + st_crs(new_data_ST) <- st_crs(obj) + # new_data_ST@sp@proj4string = obj@sp@proj4string + return(new_data_ST) } diff --git a/R/marginal.variogramST.r b/R/marginal.variogramST.r index e39e9ca..13b2273 100644 --- a/R/marginal.variogramST.r +++ b/R/marginal.variogramST.r @@ -10,21 +10,20 @@ ## input: stv. StVariogram and data.frame. ## bound: numeric. maximum distance to function fit ## Subset empirical variogram by lag distance -marginal.variogramST <- function(stv, bound, spatial=TRUE){ - #if (!grepl('^timelag.*', colnames(stv))){ +marginal.variogramST <- function(stv, bound, spatial = TRUE) { + # if (!grepl('^timelag.*', colnames(stv))){ # stop('You may want to check the input stv is valid.') - #} + # } # print(stv) if (spatial) { - vg = stv[(1:nrow(stv)) * ((stv$timelag == min(stv$timelag)) * (stv$spacelag <= bound) * !is.na(stv$gamma)),] + vg <- stv[(1:nrow(stv)) * ((stv$timelag == min(stv$timelag)) * (stv$spacelag <= bound) * !is.na(stv$gamma)), ] vg$np <- as.numeric(vg$np) - } - else { - vg = stv[(1:nrow(stv)) * ((stv$spacelag == min(stv$spacelag)) * !is.na(stv$gamma)),] + } else { + vg <- stv[(1:nrow(stv)) * ((stv$spacelag == min(stv$spacelag)) * !is.na(stv$gamma)), ] vg$dist <- vg$timelag vg$id <- 0 vg$np <- as.numeric(vg$np) } - class(vg) = c('gstatVariogram', 'data.frame') + class(vg) <- c("gstatVariogram", "data.frame") return(vg) } diff --git a/R/plot.autofitVariogram.R b/R/plot.autofitVariogram.R index 5cdf1e3..74945fb 100644 --- a/R/plot.autofitVariogram.R +++ b/R/plot.autofitVariogram.R @@ -14,19 +14,22 @@ #' @return A lattice::xyplot object. #' @export -plot.autofitVariogram <- -function (x, plotit = TRUE, title="Experimental variogram and fitted variogram model", ...) -{ - shift = 0.03 - labels = as.character(x$exp_var$np) - vario = lattice::xyplot(gamma ~ dist, data = x$exp_var, panel = automap:::autokrige.vgm.panel, - labels = labels, shift = shift, model = x$var_model, - direction = c(1, 0, 0), - ylim = c(min(0, 1.04 * min(x$exp_var$gamma)), 1.04 * - max(x$exp_var$gamma)), xlim = c(0, 1.04 * max(x$exp_var$dist)), - xlab = "Distance", ylab = "Semi-variance", main = title, - mode = "direct", ...) - if (plotit) - print(vario) - else vario -} +plot.autofitVariogram <- + function(x, plotit = TRUE, title = "Experimental variogram and fitted variogram model", ...) { + shift <- 0.03 + labels <- as.character(x$exp_var$np) + vario <- lattice::xyplot(gamma ~ dist, + data = x$exp_var, panel = automap:::autokrige.vgm.panel, + labels = labels, shift = shift, model = x$var_model, + direction = c(1, 0, 0), + ylim = c(min(0, 1.04 * min(x$exp_var$gamma)), 1.04 * + max(x$exp_var$gamma)), xlim = c(0, 1.04 * max(x$exp_var$dist)), + xlab = "Distance", ylab = "Semi-variance", main = title, + mode = "direct", ... + ) + if (plotit) { + print(vario) + } else { + vario + } + } diff --git a/R/setSTI.R b/R/setSTI.R index fbf9fcd..f9efa3f 100644 --- a/R/setSTI.R +++ b/R/setSTI.R @@ -9,7 +9,7 @@ ### logarithm: LOGICAL. log-transformation ### wireframe: LOGICAL. Whether you plot a StVariogram in wireframe or not. If not, the return will be in class of data.frame, not a list ### plot3d: LOGICAL. Wheter you make a three-dimensional graph with rgl package -#' A convenience function for the sample spatiotemporal variogram +#' A convenience function for the sample spatiotemporal variogram #' #' @param stf A ST*DF object. #' @param formula formula (inherits the same parameter in variogramST) @@ -25,17 +25,17 @@ #' @return Depends on the arguments wireframe (if TRUE, list of length 2) and plot3d (if TRUE, list of length 3), a StVariogram object otherwise. #' @export setSTI <- - function(stf, - formula, - tlags = 0:6, - cutoff = 30000, + function(stf, + formula, + tlags = 0:6, + cutoff = 30000, width = 1000, - assumeRegular = TRUE, - pseudo = TRUE, - #logarithm = FALSE, + assumeRegular = TRUE, + pseudo = TRUE, + # logarithm = FALSE, na.omit = TRUE, - wireframe = FALSE, - plot3d = FALSE, + wireframe = FALSE, + plot3d = FALSE, cores = 1) { formula <- as.formula(formula) ncol.stf <- (cutoff / width) + 1 @@ -48,40 +48,46 @@ setSTI <- # stf <- stf # } - apo.pmsub.stf <- variogramST(formula = formula, - data = stf, - tlags = tlags, - assumeRegular = assumeRegular, - pseudo = pseudo, - na.omit = na.omit, - cutoff = cutoff, - width = width, - cores = cores) - if (!wireframe & !plot3d){ + apo.pmsub.stf <- variogramST( + formula = formula, + data = stf, + tlags = tlags, + assumeRegular = assumeRegular, + pseudo = pseudo, + na.omit = na.omit, + cutoff = cutoff, + width = width, + cores = cores + ) + if (!wireframe & !plot3d) { plot.sti.set <- apo.pmsub.stf } if (wireframe) { - label.tlag = units(apo.pmsub.stf$timelag) - apo.pmsub.stf$timelag = as.numeric(apo.pmsub.stf$timelag) + label.tlag <- units(apo.pmsub.stf$timelag) + apo.pmsub.stf$timelag <- as.numeric(apo.pmsub.stf$timelag) wireframe.stf <- lattice::wireframe(gamma ~ spacelag * timelag, - apo.pmsub.stf, - drape=TRUE, - ylab = paste("Time lag (", label.tlag, ")", sep = ""), - col.regions = colorRampPalette(colors = c('white', 'red'))(100), - zlim=c(0, max(apo.pmsub.stf$gamma)*1.02)) + apo.pmsub.stf, + drape = TRUE, + ylab = paste("Time lag (", label.tlag, ")", sep = ""), + col.regions = colorRampPalette(colors = c("white", "red"))(100), + zlim = c(0, max(apo.pmsub.stf$gamma) * 1.02) + ) dev.new() print(wireframe.stf) plot.sti.set <- list(apo.pmsub.stf, wireframe.stf) } if (wireframe * plot3d == 1) { apo.pmsub.stf.mat <- matrix(apo.pmsub.stf$gamma, - byrow=FALSE, - nrow=nrow.stf, ncol=ncol.stf) - persp.stf <- rgl::persp3d(x=unique(apo.pmsub.stf$spacelag), - y=unique(apo.pmsub.stf$timelag), - z=apo.pmsub.stf.mat, - color='green3') + byrow = FALSE, + nrow = nrow.stf, ncol = ncol.stf + ) + persp.stf <- rgl::persp3d( + x = unique(apo.pmsub.stf$spacelag), + y = unique(apo.pmsub.stf$timelag), + z = apo.pmsub.stf.mat, + color = "green3" + ) persp.stf plot.sti.set <- list(apo.pmsub.stf, wireframe.stf, persp.stf) } diff --git a/R/zzz.r b/R/zzz.r index c0a9853..6717f83 100644 --- a/R/zzz.r +++ b/R/zzz.r @@ -1,41 +1,47 @@ ## Misc for npsp integration: subject to retire -svariso <- function(input, vars, maxlag = 30000, nlags = 10, estimator = 'modulus'){ - if (sum(grepl('Spatial.*', class(input))) != 0){ - .svariso.sp(input, vars, maxlag, nlags, estimator) - } else if (sum(grepl('sf.*', class(input))) != 0){ - .svariso.sf(input, vars, maxlag, nlags, estimator) - } else { - stop('The input is not sf or sp-compatible dataset') - } +svariso <- function(input, vars, maxlag = 30000, nlags = 10, estimator = "modulus") { + if (sum(grepl("Spatial.*", class(input))) != 0) { + .svariso.sp(input, vars, maxlag, nlags, estimator) + } else if (sum(grepl("sf.*", class(input))) != 0) { + .svariso.sf(input, vars, maxlag, nlags, estimator) + } else { + stop("The input is not sf or sp-compatible dataset") + } } -.as.svariso.variogram <- function(vg, estimator = 'modulus'){ - sv <- list(biny = vg$gamma, - binw = vg$np, - grid = npsp::grid.par(n = nrow(vg), - min = min(vg$dist, na.rm = T), - max = max(vg$dist, na.rm = T), - lag = (vg$spacelag[2] - vg$spacelag[1])), - data = list(x = 2, y = 1, med = weighted.mean(vg$gamma, vg$np)), - svar = list(type = 'isotropic', - estimator = estimator)) - attr(sv, 'class') <- c('svar.bin', 'bin.data', 'bin.den', 'data.grid') +.as.svariso.variogram <- function(vg, estimator = "modulus") { + sv <- list( + biny = vg$gamma, + binw = vg$np, + grid = npsp::grid.par( + n = nrow(vg), + min = min(vg$dist, na.rm = T), + max = max(vg$dist, na.rm = T), + lag = (vg$spacelag[2] - vg$spacelag[1]) + ), + data = list(x = 2, y = 1, med = weighted.mean(vg$gamma, vg$np)), + svar = list( + type = "isotropic", + estimator = estimator + ) + ) + attr(sv, "class") <- c("svar.bin", "bin.data", "bin.den", "data.grid") return(sv) } -.svariso.sp <- function(Sp, vars, ml, nlags, estimator = 'modulus'){ +.svariso.sp <- function(Sp, vars, ml, nlags, estimator = "modulus") { coord.n <- coordinates(Sp) - y <- Sp@data[,vars] - ssp <- svar.bin(coord.n, y, estimator=estimator, maxlag=ml, nlags = nlags) + y <- Sp@data[, vars] + ssp <- svar.bin(coord.n, y, estimator = estimator, maxlag = ml, nlags = nlags) return(ssp) } # sf implementation -.svariso.sf <- function(sf, vars, ml, nlags, estimator = 'modulus'){ +.svariso.sf <- function(sf, vars, ml, nlags, estimator = "modulus") { coord.n <- st_coordinates(sf) - y <- st_set_geometry(sf, NULL)[,vars] %>% unlist - ssp <- svar.bin(coord.n, y, estimator=estimator, maxlag = ml, nlags = nlags) + y <- st_set_geometry(sf, NULL)[, vars] %>% unlist() + ssp <- svar.bin(coord.n, y, estimator = estimator, maxlag = ml, nlags = nlags) return(ssp) } From df98b69b2fe71cf6fc694412d37a0f6f5c6d07d9 Mon Sep 17 00:00:00 2001 From: Insang Song <insang.song@nih.gov> Date: Wed, 1 May 2024 11:21:49 -0400 Subject: [PATCH 2/2] path fix --- .github/{workflow => workflows}/R-CMD-as-cran-check.yaml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{workflow => workflows}/R-CMD-as-cran-check.yaml (100%) diff --git a/.github/workflow/R-CMD-as-cran-check.yaml b/.github/workflows/R-CMD-as-cran-check.yaml similarity index 100% rename from .github/workflow/R-CMD-as-cran-check.yaml rename to .github/workflows/R-CMD-as-cran-check.yaml