Skip to content

Commit

Permalink
added smart_upload(); #22
Browse files Browse the repository at this point in the history
  • Loading branch information
beanumber committed Jun 27, 2017
1 parent fa6057d commit 2644292
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 45 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: etl
Type: Package
Title: Extract-Transform-Load Framework for Medium Data
Version: 0.3.5.9003
Version: 0.3.5.9004
Date: 2016-11-28
Authors@R: c(
person("Ben", "Baumer", email = "[email protected]",
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(find_schema)
export(is.etl)
export(match_files_by_year_months)
export(smart_download)
export(smart_upload)
export(src_mysql_cnf)
export(valid_year_month)
import(dplyr)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

* Added `clobber` option to `smart_download`
* Added `db_type` for easy typing of connection objects
* Added `smart_upload` for pushing files to database

# etl 0.3.5 (2016-11-28)

Expand Down
37 changes: 37 additions & 0 deletions R/etl_extract.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,40 @@ etl_extract.etl_mtcars <- function(obj, ...) {
}


#' Download only those files that don't already exist
#' @param obj an \code{\link{etl}} object
#' @param src a character vector of URLs that you want to download
#' @param new_filenames an optional character vector of filenames for the new
#' (local) files. Defaults to having the same filenames as those in \code{src}.
#' @param clobber do you want to clobber any existing files?
#' @param ... arguments passed to \code{\link[downloader]{download}}
#' @details Downloads only those files in \code{src} that are not already present in
#' the directory specified by the \code{raw_dir} attribute of \code{obj}.
#' @author idiom courtesy of Hadley Wickham
#' @importFrom downloader download
#' @export
#'
#' @examples
#' cars <- etl("mtcars")
#' urls <- c("http://www.google.com", "http://www.nytimes.com")
#' smart_download(cars, src = urls)
#' # won't download again if the files are already there
#' smart_download(cars, src = urls)
#' # use clobber to overwrite
#' smart_download(cars, src = urls, clobber = TRUE)
smart_download <- function(obj, src, new_filenames = basename(src), clobber = FALSE, ...) {
if (length(src) != length(new_filenames)) {
stop("src and new_filenames must be of the same length")
}
lcl <- file.path(attr(obj, "raw_dir"), new_filenames)
if (!clobber) {
missing <- !file.exists(lcl)
} else {
missing <- new_filenames == new_filenames
}
message(paste("Downloading", sum(missing), "new files. ",
sum(!missing), "untouched."))
mapply(downloader::download, src[missing], lcl[missing], ... = ...)
}


46 changes: 40 additions & 6 deletions R/etl_load.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,51 @@ etl_load.default <- function(obj, ...) {
#' @export

etl_load.etl_mtcars <- function(obj, ...) {
message("Loading processed data...")
data <- utils::read.csv(file.path(attr(obj, "load_dir"), "mtcars.csv"))
smart_upload(obj)
invisible(obj)
}

obj <- verify_con(obj)
if (DBI::dbWriteTable(obj$con, "mtcars", value = data, row.names = FALSE,
append = TRUE)) {
message("Data was successfully written to database.")
#' Upload a list of files to the DB
#' @param obj An \code{\link{etl}} object
#' @param src a list of CSV files to upload. If \code{NULL}, will return all
#' CSVs in the load directory
#' @param tablenames a list the same length as \code{src} of tablenames in the
#' database corresponding to each of the files in \code{src}. If \code{NULL},
#' will default to the same name as \code{src}, without paths or file extensions.
#' @param ... arguments passed to \code{\link[DBI]{dbWriteTable}}
#' @importFrom DBI dbWriteTable
#' @export
#' @examples
#' \dontrun{
#' if (require(RMySQL)) {
#' # must have pre-existing database "fec"
#' # if not, try
#' system("mysql -e 'CREATE DATABASE IF NOT EXISTS fec;'")
#' db <- src_mysql_cnf(dbname = "mtcars")
#' }
#' }
smart_upload <- function(obj, src = NULL, tablenames = NULL, ...) {
if (is.null(src)) {
src <- list.files(attr(obj, "load_dir"), pattern = "\\.csv", full.names = TRUE)
}
if (is.null(tablenames)) {
tablenames <- basename(src) %>%
gsub("\\.csv", "", x = .)
}
if (length(src) != length(tablenames)) {
stop("src and tablenames must be of the same length")
}
message(paste("Uploading", length(src), "file(s) to the database..."))

# write the tables directly to the DB
mapply(DBI::dbWriteTable, name = tablenames, value = src,
MoreArgs = list(conn = obj$con, append = TRUE, ... = ...))

invisible(obj)
}



#' Initialize a database using a defined schema
#'
#' @param script either a vector of SQL commands to be executed, or
Expand Down
34 changes: 0 additions & 34 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,40 +24,6 @@ verify_con <- function(x, dir = tempdir()) {
# }


#' Download only those files that don't already exist
#' @param obj an \code{\link{etl}} object
#' @param src a character vector of URLs that you want to download
#' @param new_filenames an optional character vector of filenames for the new
#' (local) files. Defaults to having the same filenames as those in \code{src}.
#' @param clobber do you want to clobber any existing files?
#' @param ... arguments passed to \code{\link[downloader]{download}}
#' @details Downloads only those files in \code{src} that are not already present in
#' the directory specified by the \code{raw_dir} attribute of \code{obj}.
#' @author idiom courtesy of Hadley Wickham
#' @importFrom downloader download
#' @export
#'
#' @examples
#' cars <- etl("mtcars")
#' urls <- c("http://www.google.com", "http://www.nytimes.com")
#' smart_download(cars, src = urls)
#' # won't download again if the files are already there
#' smart_download(cars, src = urls)
#' # use clobber to overwrite
#' smart_download(cars, src = urls, clobber = TRUE)
smart_download <- function(obj, src, new_filenames = basename(src), clobber = FALSE, ...) {
if (length(src) != length(new_filenames)) {
stop("src and new_filenames must be of the same length")
}
lcl <- file.path(attr(obj, "raw_dir"), new_filenames)
if (!clobber) {
missing <- !file.exists(lcl)
} else {
missing <- new_filenames == new_filenames
}
mapply(downloader::download, src[missing], lcl[missing], ... = ...)
}

#' Ensure that years and months are within a certain time span
#' @param years a numeric vector of years
#' @param months a numeric vector of months
Expand Down
2 changes: 1 addition & 1 deletion man/smart_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/smart_upload.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions tests/testthat/test-etl.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ test_that("sqlite works", {
expect_true(file.exists(find_schema(cars_sqlite)))
expect_message(find_schema(cars_sqlite, "my_crazy_schema", "etl"))
expect_output(summary(cars_sqlite), "files")
expect_message(cars_sqlite %>% etl_create(), "success")
expect_message(cars_sqlite %>% etl_create(), "Uploading")
expect_message(cars_sqlite %>% etl_init(), "Loading SQL script")
expect_message(
cars_sqlite %>% etl_cleanup(delete_raw = TRUE, delete_load = TRUE),
Expand All @@ -18,7 +18,7 @@ test_that("sqlite works", {

test_that("dplyr works", {
expect_message(cars <- etl("mtcars") %>%
etl_create(), regexp = "success")
etl_create(), regexp = "Uploading")
expect_gt(length(src_tbls(cars)), 0)
tbl_cars <- cars %>%
tbl("mtcars")
Expand All @@ -30,7 +30,7 @@ test_that("dplyr works", {
# double up the data
expect_message(
cars %>%
etl_update(), regexp = "success")
etl_update(), regexp = "Uploading")
res2 <- tbl_cars %>%
collect()
expect_equal(nrow(res2), 2 * nrow(mtcars))
Expand Down

0 comments on commit 2644292

Please sign in to comment.