Skip to content

Commit

Permalink
Adding support for sql downloads #752 (#753)
Browse files Browse the repository at this point in the history
  • Loading branch information
jhnwllr authored Sep 25, 2024
1 parent 39bd610 commit 495a552
Show file tree
Hide file tree
Showing 12 changed files with 573 additions and 3 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: A programmatic interface to the Web Service methods
retrieving information on data providers, getting species occurrence
records, getting counts of occurrence records, and using the GBIF
tile map service to make rasters summarizing huge amounts of data.
Version: 3.8.0.1
Version: 3.8.0.2
License: MIT + file LICENSE
Authors@R: c(
person("Scott", "Chamberlain", role = "aut", comment = c(ORCID="0000-0003-1444-9135")),
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ export(occ_download_list)
export(occ_download_meta)
export(occ_download_prep)
export(occ_download_queue)
export(occ_download_sql)
export(occ_download_sql_prep)
export(occ_download_sql_validate)
export(occ_download_wait)
export(occ_facet)
export(occ_get)
Expand Down
2 changes: 1 addition & 1 deletion R/occ_download_describe.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#' occ_download_describe("simpleCsv")$fields
#' }
occ_download_describe <- function(x="dwca") {
acc_args <- c("dwca","simpleCsv","simpleAvro","simpleParquet","speciesList")
acc_args <- c("dwca","simpleCsv","simpleAvro","simpleParquet","speciesList","sql")
stopifnot(x %in% acc_args)
url <- paste0(gbif_base(),"/occurrence/download/describe/",x)
out <- gbif_GET(url,args=NULL,parse=TRUE)
Expand Down
153 changes: 153 additions & 0 deletions R/occ_download_sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#' @title Download occurrence data using a SQL query
#'
#' @param q sql query
#' @param format only "SQL_TSV_ZIP" is supported right now
#' @param user your GBIF user name
#' @param pwd your GBIF password
#' @param email your email address
#' @param validate should the query be validated before submission. Default is
#' TRUE.
#' @param curlopts list of curl options
#'
#' @details
#' This is an experimental feature, and the implementation may change throughout
#' 2024. The feature is currently only available for preview by invited users.
#' Contact `[email protected]` to request access.
#'
#' Please see the article here for more information:
#' \url{https://docs.ropensci.org/rgbif/articles/getting_occurrence_data.html}
#'
#' @return an object of class 'occ_download_sql'
#'
#' @references
#' \url{https://techdocs.gbif.org/en/data-use/api-sql-downloads}
#'
#' @name occ_download_sql
#' @export
#'
#' @examples \dontrun{
#' occ_download_sql("SELECT gbifid,countryCode FROM occurrence
#' WHERE genusKey = 2435098")
#' }
#'
occ_download_sql <- function(q = NULL,
format = "SQL_TSV_ZIP",
user = NULL,
pwd = NULL,
email = NULL,
validate = TRUE,
curlopts = list()) {

z <- occ_download_sql_prep(q=q,
format=format,
user=user,
pwd=pwd,
email=email,
validate=validate,
curlopts=curlopts)

out <- rg_POST(z$url, req = z$request, user = z$user, pwd = z$pwd, curlopts=curlopts)
md <- occ_download_meta(out) # get meta_data for printing
citation <- gbif_citation(md)$download # get citation

structure(out,
class = "occ_download_sql",
user = z$user,
email = z$email,
format = z$format,
status = md$status,
created = md$created,
downloadLink = md$downloadLink,
doi = md$doi,
citation = citation
)

}

#' @name occ_download_sql
#' @export
occ_download_sql_validate <- function(q = NULL,
user = NULL,
pwd = NULL) {
stopifnot(is.list(q))
url <- "https://api.gbif.org/v1/occurrence/download/request/validate"
user <- check_user(user)
pwd <- check_pwd(pwd)
out <- rg_POST(url=url, req=q, user=user, pwd=pwd, curlopts=list())
out
}

#' @name occ_download_sql
#' @export
occ_download_sql_prep <- function(q=NULL,
format = "SQL_TSV_ZIP",
user = NULL,
pwd = NULL,
email = NULL,
validate = TRUE,
curlopts = list()) {

url <- paste0(gbif_base(), '/occurrence/download/request')
assert(q,"character")
assert(format,"character")
if(!format == "SQL_TSV_ZIP") stop("Only format='SQL_TSV_ZIP' is supported at this time.")

user <- check_user(user)
pwd <- check_pwd(pwd)
email <- check_email(email)

req <- list(
sendNotification = TRUE,
notificationAddresses = email,
format = unbox(format),
sql = unbox(q)
)

if(validate) occ_download_sql_validate(q = req, user = user, pwd = pwd)

structure(list(
url = url,
request = req,
json_request = jsonlite::prettify(check_inputs(req),indent = 1),
user = user,
pwd = pwd,
email = email,
format = format,
curlopts = curlopts),
class = "occ_download_sql_prep")

}

print.occ_download_sql <- function(x) {
stopifnot(inherits(x, 'occ_download_sql'))
cat_n("<<gbif download sql>>")
cat_n(" Your download is being processed by GBIF:")
cat_n(" https://www.gbif.org/occurrence/download/",x)
cat_n(" Check status with")
cat_n(" occ_download_wait('",x,"')")
cat_n(" After it finishes, use")
cat_n(" d <- occ_download_get('",x,"') %>%")
cat_n(" occ_download_import()")
cat_n(" to retrieve your download.")
cat_n("Download Info:")
cat_n(" Username: ", attr(x, "user"))
cat_n(" E-mail: ", attr(x, "email"))
cat_n(" Format: ", attr(x, "format"))
cat_n(" Download key: ", x)
cat_n(" Created: ",attr(x, "created"))
cat_n("Citation Info: ")
cat_n(" Please always cite the download DOI when using this data.")
cat_n(" https://www.gbif.org/citation-guidelines")
cat_n(" DOI: ", attr(x,"doi"))
cat_n(" Citation:")
cat_n(" ", attr(x,"citation"))
}


print.occ_download_sql_prep <- function(x) {
stopifnot(inherits(x, 'occ_download_sql_prep'))
cat_n("<<Occurrence Download SQL Prep>>")
cat_n("Format: ", x$format)
cat_n("Email: ", x$email)
cat_n("Request: ", x$json_request)
}
70 changes: 70 additions & 0 deletions man/occ_download_sql.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/rgbif-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

76 changes: 76 additions & 0 deletions tests/fixtures/occ_download_sql_1.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
http_interactions:
- request:
method: post
uri: https://api.gbif.org/v1/occurrence/download/request/validate
body:
encoding: ''
string: '{"sendNotification":[true],"notificationAddresses":["<gbif_user>@gbif.org"],"format":"SQL_TSV_ZIP","sql":"SELECT
gbifid,countryCode FROM occurrence WHERE genusKey = 2435098"}'
headers:
Accept-Encoding: gzip, deflate
Content-Type: application/json
Accept: application/json
response:
status:
status_code: '201'
message: Created
explanation: Document created, URL follows
headers:
status: HTTP/1.1 201 Created
content-type: application/json
body:
encoding: ''
file: no
string: '{"sql":"SELECT gbifid, countrycode\nFROM occurrence\nWHERE occurrence.genuskey
= 2435098","notificationAddresses":["<gbif_user>@gbif.org"],"sendNotification":false,"type":"OCCURRENCE","format":"SQL_TSV_ZIP"}'
recorded_at: 2024-09-24 14:05:47 GMT
recorded_with: vcr/1.2.0, webmockr/0.9.0
- request:
method: post
uri: https://api.gbif.org/v1/occurrence/download/request
body:
encoding: ''
string: '{"sendNotification":[true],"notificationAddresses":["<gbif_user>@gbif.org"],"format":"SQL_TSV_ZIP","sql":"SELECT
gbifid,countryCode FROM occurrence WHERE genusKey = 2435098"}'
headers:
Accept-Encoding: gzip, deflate
Content-Type: application/json
Accept: application/json
response:
status:
status_code: '201'
message: Created
explanation: Document created, URL follows
headers:
status: HTTP/1.1 201 Created
content-type: application/json
body:
encoding: ''
file: no
string: 0028400-240906103802322
recorded_at: 2024-09-24 14:05:47 GMT
recorded_with: vcr/1.2.0, webmockr/0.9.0
- request:
method: get
uri: https://api.gbif.org/v1/occurrence/download/0028400-240906103802322
body:
encoding: ''
string: ''
headers:
Accept-Encoding: gzip, deflate
Accept: application/json, text/xml, application/xml, */*
response:
status:
status_code: '200'
message: OK
explanation: Request fulfilled, document follows
headers:
status: HTTP/1.1 200 OK
content-type: application/json
body:
encoding: ''
file: no
string: '{"key":"0028400-240906103802322","doi":"10.15468/dl.fnrv3s","license":"unspecified","request":{"sql":"SELECT
gbifid,countryCode FROM occurrence WHERE genusKey = 2435098","creator":"<gbif_user>","notificationAddresses":["<gbif_user>@gbif.org"],"sendNotification":false,"type":"OCCURRENCE","format":"SQL_TSV_ZIP"},"created":"2024-09-24T14:05:47.599+00:00","modified":"2024-09-24T14:05:47.599+00:00","eraseAfter":"2025-03-24T14:05:47.552+00:00","status":"PREPARING","downloadLink":"https://api.gbif.org/v1/occurrence/download/request/0028400-240906103802322.zip","size":0,"totalRecords":0,"numberDatasets":0,"source":"rgbif"}'
recorded_at: 2024-09-24 14:05:47 GMT
recorded_with: vcr/1.2.0, webmockr/0.9.0
30 changes: 30 additions & 0 deletions tests/testthat/test-occ_download_sql.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

test_that("occ_download_sql : real requests work", {
skip_on_cran()
skip_on_ci()

vcr::use_cassette("occ_download_sql_1", {
qqq <- occ_download_sql("SELECT gbifid,countryCode FROM occurrence WHERE genusKey = 2435098")
}, match_requests_on = c("method", "uri", "body"))
expect_is(qqq, "occ_download_sql")
expect_equal(attr(qqq, "status"), "PREPARING")
expect_equal(attr(qqq, "format"), "SQL_TSV_ZIP")
print(qqq)

})

test_that("occ_download_sql : fails well", {
skip_on_cran()
skip_on_ci()

expect_error(occ_download_sql("dog"))
expect_error(occ_download_sql("SELECT * FROM occurrence"))
expect_error(occ_download_sql("SELECT dog FROM occurrence"))
})







Loading

0 comments on commit 495a552

Please sign in to comment.