diff --git a/.zenodo.json b/.zenodo.json index 62e7b46..42afd35 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "title": "inbodb: Connect to and Retrieve Data from Databases on the INBO Server", - "version": "0.0.6", + "version": "0.0.7", "license": "GPL-3.0", "upload_type": "software", "description": "

A bundle of functions to connect to and retrieve data from databases on the INBO server, with dedicated functions to query some of these databases.<\/p>", diff --git a/CITATION.cff b/CITATION.cff index 65fa34d..2f9c93d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -44,4 +44,4 @@ abstract: "A bundle of functions to connect to and retrieve data from databases identifiers: - type: url value: https://inbo.github.io/inbodb/ -version: 0.0.6 +version: 0.0.7 diff --git a/DESCRIPTION b/DESCRIPTION index c325967..ad1be8f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: inbodb Title: Connect to and Retrieve Data from Databases on the INBO Server -Version: 0.0.6 +Version: 0.0.7 Authors@R: c( person("Els", "Lommelen", , "els.lommelen@inbo.be", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3481-5684", affiliation = "Research Institute for Nature and Forest (INBO)")), diff --git a/NAMESPACE b/NAMESPACE index 74394c5..d15ee87 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -35,7 +35,6 @@ importFrom(dplyr,collect) importFrom(dplyr,distinct) importFrom(dplyr,filter) importFrom(dplyr,group_by) -importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,n) diff --git a/NEWS.md b/NEWS.md index 2e70d86..fd5e25d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# inbodb 0.0.7 + +* update functions `get_florabank_observations()`, + `get_florabank_taxon_ifbl_year()` and + `get_florabank_traits()` (database 'D0021_00_userFlora' is deprecated and + replaced with 'D0152_00_Flora') + # inbodb 0.0.6 * new functions `get_taxonlijsten_lists()`, `get_taxonlijsten_features()` and diff --git a/R/connect_inbo_dbase.R b/R/connect_inbo_dbase.R index 90c3e9f..a8851c9 100644 --- a/R/connect_inbo_dbase.R +++ b/R/connect_inbo_dbase.R @@ -6,7 +6,7 @@ #' The function can only be used from within the INBO network. #' #' For more information, refer to -#' \href{https://inbo.github.io/tutorials/tutorials/r_database_access/}{this +#' \href{https://tutorials.inbo.be/tutorials/r_database_access/}{this #' tutorial}. #' #' @param database_name char Name of the INBO database you want to connect diff --git a/R/get_florabank_observations.R b/R/get_florabank_observations.R index 4e0ad7b..41dd67d 100644 --- a/R/get_florabank_observations.R +++ b/R/get_florabank_observations.R @@ -28,18 +28,19 @@ #' @return A dataframe with the following variables: #' `NaamNederlands`, #' `NaamWetenschappelijk`, +#' `AcceptedNaamWetenschappelijk`, #' `Bron`, #' `BeginDatum`, #' `EindDatum`, -#' `hok`, +#' `Hok`, #' `Toponiem`, -#' `CommentaarTaxon`, -#' `CommentaarHabitat`, -#' `WaarnemingID`, +#' `CommentaarEvent`, +#' `CommentaarWaarneming`, +#' `EventID`, +#' `X_event`, +#' `Y_event`, #' `X_waarneming`, -#' `Y_waarneming`, -#' `X_meting`, -#' `Y_meting` +#' `Y_waarneming` #' #' @importFrom glue glue_sql #' @importFrom assertthat assert_that @@ -55,7 +56,7 @@ #' # code can only be run if a connection to the database is possible #' library(inbodb) #' # connect to florabank -#' db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +#' db_connectie <- connect_inbo_dbase("D0152_00_Flora") #' #' # query and collect the data using scientific name #' succprat1 <- get_florabank_observations(db_connectie, @@ -100,50 +101,74 @@ #' } get_florabank_observations <- function(connection, names, fixed = FALSE, - collect = FALSE) { + collect = FALSE) { assert_that(inherits(connection, what = "Microsoft SQL Server"), msg = "Not a connection object to database.") - assert_that(connection@info$dbname == "D0021_00_userFlora") + assert_that(connection@info$dbname == "D0152_00_Flora") if (missing(names)) { stop("Please provide names.") } - sql_statement <- "SELECT DISTINCT - tblTaxon.NaamNederlands - , tblTaxon.NaamWetenschappelijk - , cdeBron.Omschrijving AS Bron - , tblWaarneming.BeginDatum - , tblWaarneming.EindDatum - , tblIFBLHok.Code AS hok - , tblWaarneming.Opmerking AS Toponiem - , tblMeting.CommentaarTaxon - , tblMeting.CommentaarHabitat - , tblWaarneming.ID AS WaarnemingID - , tblWaarneming.Cor_X AS X_waarneming - , tblWaarneming.Cor_Y AS Y_waarneming - , tblMeting.Cor_X AS X_meting - , tblMeting.Cor_Y AS Y_meting - FROM dbo.tblWaarneming - INNER JOIN dbo.tblMeting ON tblWaarneming.ID = tblMeting.WaarnemingID - INNER JOIN dbo.relTaxonTaxon ON relTaxonTaxon.TaxonIDChild = tblMeting.TaxonID - INNER JOIN dbo.tblTaxon ON tblTaxon.ID = relTaxonTaxon.TaxonIDParent - LEFT JOIN dbo.tblIFBLHok ON tblIFBLHok.ID = tblWaarneming.IFBLHokID - INNER JOIN dbo.cdeBron ON cdeBron.Code = tblWaarneming.BronCode - WHERE 1=1 - AND (tblMeting.MetingStatusCode='GDGA' OR tblMeting.MetingStatusCode='GDGK') + sql_statement <- "SELECT DISTINCT COALESCE(cte.ParentNaamNederlands + , cte.NaamNederlands) as NaamNederlands + , cte.NaamWetenschappelijk + , cte.ParentNaamWetenschappelijk AS AcceptedNaamWetenschappelijk + , b.Beschrijving AS Bron + , e.BeginDatum + , e.EindDatum + , h.Code AS Hok + , e.Toponiem + , e.Opmerking AS CommentaarEvent + , w.Commentaar AS CommentaarWaarneming + , e.id AS EventID + , e.CorX AS X_event + , e.CorY AS Y_event + , w.CorX AS X_waarneming + , w.CorY AS Y_waarneming +FROM [event] e + INNER JOIN Bron b ON b.ID = e.BronID + INNER JOIN Waarneming w ON w.EventID = e.ID + INNER JOIN waarnemingstatus ws ON ws.id = w.WaarnemingStatusID + INNER JOIN Hok h ON h.ID = e.HokID + INNER JOIN (SELECT t.id AS taxonid + , t.code AS taxoncode + , t.NaamNederlands + , t.NaamWetenschappelijk + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.id ELSE t.ParentTaxonID END AS ParentTaxonID + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.code ELSE tp.code END AS ParentTaxoncode + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamNederlands ELSE tp.NaamNederlands + END AS ParentNaamNederlands + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamWetenschappelijk ELSE tp.NaamWetenschappelijk + END AS ParentNaamWetenschappelijk + FROM Taxon t + LEFT JOIN Taxon tp ON tp.id = t.ParentTaxonID)cte + ON cte.taxonid = w.TaxonID +WHERE 1=1 + AND ws.Code in ('GDGA','GDGK') " if (!fixed) { like_string <- - paste0("AND (", + paste0("AND cte.ParentTaxonID in + (SELECT DISTINCT CASE WHEN t.ParentTaxonID IS NULL + OR t.TaxonRelatieTypeID = 1 THEN t.id ELSE t.ParentTaxonID + END AS ParentTaxonID + FROM Taxon t + LEFT JOIN Taxon tp ON tp.id = t.ParentTaxonID + WHERE 1=1 + AND (", paste0( - c(paste0("tblTaxon.NaamNederlands", " LIKE ", "'%", names, "%'"), - paste0("tblTaxon.NaamWetenschappelijk", " LIKE ", "'%", names, + c(paste0("cte.NaamNederlands", " LIKE ", "'%", names, "%'"), + paste0("cte.NaamWetenschappelijk", " LIKE ", "'%", names, "%'")), collapse = " OR "), - ")") + "))") sql_statement <- glue_sql( sql_statement, like_string, @@ -151,8 +176,8 @@ get_florabank_observations <- function(connection, names, fixed = FALSE, } else { sql_statement <- glue_sql( sql_statement, - "AND (tblTaxon.NaamWetenschappelijk IN ({names*}) OR - tblTaxon.NaamNederlands IN ({names*})) + "AND (cte.NaamWetenschappelijk IN ({names*}) OR + cte.NaamNederlands IN ({names*})) ", names = names, .con = connection) @@ -160,7 +185,7 @@ get_florabank_observations <- function(connection, names, fixed = FALSE, sql_statement <- glue_sql( sql_statement, - "ORDER BY tblWaarneming.BeginDatum DESC OFFSET 0 ROWS", + "ORDER BY e.BeginDatum DESC OFFSET 0 ROWS", .con = connection) sql_statement <- iconv(sql_statement, from = "UTF-8", to = "latin1") diff --git a/R/get_florabank_taxon_ifbl_year.R b/R/get_florabank_taxon_ifbl_year.R index 61a42cb..632fc63 100644 --- a/R/get_florabank_taxon_ifbl_year.R +++ b/R/get_florabank_taxon_ifbl_year.R @@ -20,7 +20,7 @@ #' #' @param taxongroup Choose for which taxonomic group you want the unique #' combinations. One of `"Vaatplanten"` (the default), `"Mossen"`, -#' `"Korstmossen"` +#' `"Lichenen (korstmossen)"` #' or `"Kranswieren"`. #' #' @param collect If FALSE (the default), a remote `tbl` object is returned. @@ -34,16 +34,10 @@ #' `IFBL`-square #' (either at 1 km x 1 km or 4 km x 4 km resolution) and year. In case the #' resolution is 1 km x 1 km, a variable `ifbl_4by4` gives the corresponding -#' `ifbl_4by4` identifier within which the `ifbl_1by1` square is located. -#' In case -#' the resolution is 4 km x 4 km, the variable `ifbl_squares` is a concatenation -#' of all nested squares with observations for the taxon in the corresponding -#' year. This can be nested 1 x 1 squares as well as the corresponding 4 x 4 -#' square (the latter is the case if the original resolution of the observation -#' is at 4 x 4 resolution). In addition, the variable `ifbl_number_squares` -#' gives -#' the number of unique nested squares where the taxon was observed for that -#' year and 4 x 4 square combination. +#' 4 km x 4 km square within which the 1 km x 1 km square is located. +#' In case the resolution is 4 km x 4 km the variable `ifbl_number_squares` +#' gives the number of unique nested squares where the taxon was observed +#' for that year and 4 x 4 square combination. #' #' @importFrom glue glue_sql #' @importFrom assertthat assert_that @@ -56,13 +50,13 @@ #' \dontrun{ #' library(inbodb) #' # connect to florabank -#' db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +#' db_connectie <- connect_inbo_dbase("D0152_00_Flora") #' #' # get records at 1 km x 1 km resolution for vascular plants from 2010 #' # (default) without collecting all data into memory (default). #' fb_kwartier <- get_florabank_taxon_ifbl_year(db_connectie) #' # to collect the data in memory set collect to TRUE or do -#' fb_kwartier <- collect(fb_kwartier) +#' fb_kwartier <- dplyr::collect(fb_kwartier) #' #' # get records at 4 km x 4 km resolution starting from 2000 #' fb_uur <- get_florabank_taxon_ifbl_year(db_connectie, starting_year = 2000, @@ -84,7 +78,7 @@ get_florabank_taxon_ifbl_year <- function(connection, assert_that(inherits(connection, what = "Microsoft SQL Server"), msg = "Not a connection object to database.") - assert_that(connection@info$dbname == "D0021_00_userFlora") + assert_that(connection@info$dbname == "D0152_00_Flora") assert_that(is.numeric(starting_year)) assert_that(starting_year <= as.numeric(format(Sys.Date(), "%Y"))) @@ -96,45 +90,60 @@ get_florabank_taxon_ifbl_year <- function(connection, if (ifbl_resolution == "4km-by-4km") { glue_statement <- glue_sql( - "SELECT DISTINCT - tblIFBLHok.Code AS hok - , SUBSTRING(tblIFBLHok.Code, 1, 5) AS ifbl_4by4 - , Year(tblWaarneming.BeginDatum) AS Jaar - , relTaxonTaxon.TaxonIDParent - , tblTaxon.Code AS Taxoncode - FROM - (((tblMeting INNER JOIN - (tblIFBLHok INNER JOIN tblWaarneming - ON tblIFBLHok.ID = tblWaarneming.IFBLHokID) - ON tblMeting.WaarnemingID = tblWaarneming.ID) - INNER JOIN relTaxonTaxon ON tblMeting.TaxonID = relTaxonTaxon.TaxonIDChild) - INNER JOIN tblTaxon ON relTaxonTaxon.TaxonIDParent = tblTaxon.ID) - INNER JOIN relTaxonTaxonGroep ON tblTaxon.ID = relTaxonTaxonGroep.TaxonID - INNER JOIN tblTaxonGroep - ON relTaxonTaxonGroep.TaxonGroepID = tblTaxonGroep.ID - WHERE - tblIFBLHok.Code LIKE '%-%' AND - tblTaxon.Code NOT LIKE '%-sp' AND - Year([tblWaarneming].[BeginDatum]) >={starting_year} AND - (Year([tblWaarneming].[BeginDatum])=Year([tblWaarneming].[EindDatum])) AND - (tblTaxonGroep.Naam={taxongroup}) AND - (tblMeting.MetingStatusCode='GDGA' OR tblMeting.MetingStatusCode='GDGK') - ORDER BY - Year(tblWaarneming.BeginDatum) DESC OFFSET 0 ROWS", + "SELECT DISTINCT h.Code AS Hok + , CASE WHEN tmp.code IS NULL THEN h.code ELSE tmp.Code END AS ifbl_4by4 + , DATEPART(year, e.BeginDatum) AS Jaar + , cte.ParentTaxonID + , cte.ParentTaxoncode + , cte.ParentNaamWetenschappelijk + , cte.ParentNaamNederlands +FROM [event] e + INNER JOIN Hok h ON h.ID = e.HokID + INNER JOIN Waarneming w ON w.EventID = e.ID + INNER JOIN waarnemingstatus ws ON ws.id = w.WaarnemingStatusID + LEFT JOIN (SELECT HokIDChild + , h.Code + FROM Hok_Hok hh + INNER JOIN HokRelatieType hrt ON hrt.ID = hh.HokRelatieTypeID + INNER JOIN Hok h ON h.ID = hh.HokIDParent + WHERE hrt.Code = 'DV' + )tmp ON tmp.HokIDChild = e.hokid + INNER JOIN (SELECT t.id AS taxonid + , t.code AS taxoncode + , t.NaamNederlands + , t.NaamWetenschappelijk + , t.TaxonGroepID + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.id ELSE t.ParentTaxonID END AS ParentTaxonID + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.code ELSE tp.code END AS ParentTaxoncode + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamNederlands ELSE tp.NaamNederlands + END AS ParentNaamNederlands + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamWetenschappelijk ELSE tp.NaamWetenschappelijk + END AS ParentNaamWetenschappelijk + FROM Taxon t + LEFT JOIN Taxon tp ON tp.id = t.ParentTaxonID)cte + ON cte.taxonid = w.TaxonID + INNER JOIN TaxonGroep tg ON tg.ID = cte.TaxonGroepID +WHERE 1=1 + AND cte.ParentTaxoncode NOT LIKE '%-sp' + AND DATEPART(year, e.BeginDatum) >= {starting_year} + AND DATEPART(year, e.BeginDatum) = DATEPART(year, e.EindDatum) + AND tg.Beschrijving = {taxongroup} + AND ws.code IN ('GDGA','GDGK') +ORDER BY DATEPART(year, e.BeginDatum) desc OFFSET 0 ROWS", starting_year = starting_year, taxongroup = taxongroup, .con = connection) glue_statement <- iconv(glue_statement, from = "UTF-8", to = "latin1") query_result <- tbl(connection, sql(glue_statement)) - query_result <- query_result %>% - group_by(.data$ifbl_4by4, .data$Jaar, .data$TaxonIDParent, - .data$Taxoncode) %>% - #paste with collapse does not translate to sql - #str_flatten() is not available for Microsoft SQL Server - #sql(STRING_AGG("hok", ",")) also does not work - #fix this later + group_by(.data$ifbl_4by4, .data$Jaar, .data$ParentTaxonID, + .data$ParentTaxoncode, .data$ParentNaamWetenschappelijk, + .data$ParentNaamNederlands) %>% summarize( ifbl_number_squares = n()) %>% ungroup() @@ -149,31 +158,49 @@ get_florabank_taxon_ifbl_year <- function(connection, } glue_statement <- glue_sql( - "SELECT DISTINCT - tblIFBLHok.Code AS ifbl_1by1 - , SUBSTRING(tblIFBLHok.Code, 1, 5) AS ifbl_4by4 - , Year(tblWaarneming.BeginDatum) AS Jaar - , relTaxonTaxon.TaxonIDParent - , tblTaxon.Code AS Taxoncode - FROM - (((tblMeting INNER JOIN - (tblIFBLHok INNER JOIN tblWaarneming - ON tblIFBLHok.ID = tblWaarneming.IFBLHokID) - ON tblMeting.WaarnemingID = tblWaarneming.ID) - INNER JOIN relTaxonTaxon ON tblMeting.TaxonID = relTaxonTaxon.TaxonIDChild) - INNER JOIN tblTaxon ON relTaxonTaxon.TaxonIDParent = tblTaxon.ID) - INNER JOIN relTaxonTaxonGroep ON tblTaxon.ID = relTaxonTaxonGroep.TaxonID - INNER JOIN tblTaxonGroep - ON relTaxonTaxonGroep.TaxonGroepID = tblTaxonGroep.ID - WHERE - tblIFBLHok.Code LIKE '%-%-%' AND - tblTaxon.Code NOT LIKE '%-sp' AND - Year([tblWaarneming].[BeginDatum]) >={starting_year} AND - (Year([tblWaarneming].[BeginDatum])=Year([tblWaarneming].[EindDatum])) AND - (tblTaxonGroep.Naam={taxongroup}) AND - (tblMeting.MetingStatusCode='GDGA' OR tblMeting.MetingStatusCode='GDGK') - ORDER BY - Year(tblWaarneming.BeginDatum) DESC OFFSET 0 ROWS", + "SELECT DISTINCT h.Code AS Hok + , tmp.code AS ifbl_4by4 + , DATEPART(year, e.BeginDatum) AS Jaar + , cte.ParentTaxonID + , cte.ParentTaxoncode + , cte.ParentNaamWetenschappelijk + , cte.ParentNaamNederlands +FROM [event] e + INNER JOIN Hok h ON h.ID = e.HokID + INNER JOIN Waarneming w ON w.EventID = e.ID + INNER JOIN waarnemingstatus ws ON ws.id = w.WaarnemingStatusID + INNER JOIN (SELECT HokIDChild + , h.Code + FROM Hok_Hok hh + INNER JOIN HokRelatieType hrt ON hrt.ID = hh.HokRelatieTypeID + INNER JOIN Hok h ON h.ID = hh.HokIDParent + WHERE hrt.Code = 'DV')tmp ON tmp.HokIDChild = e.hokid + INNER JOIN (SELECT t.id AS taxonid + , t.code AS taxoncode + , t.NaamNederlands + , t.NaamWetenschappelijk + , t.TaxonGroepID + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.id ELSE t.ParentTaxonID END AS ParentTaxonID + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.code ELSE tp.code END AS ParentTaxoncode + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamNederlands ELSE tp.NaamNederlands + END AS ParentNaamNederlands + , CASE WHEN t.ParentTaxonID IS NULL OR t.TaxonRelatieTypeID = 1 + THEN t.NaamWetenschappelijk ELSE tp.NaamWetenschappelijk + END AS ParentNaamWetenschappelijk + FROM Taxon t + LEFT JOIN Taxon tp ON tp.id = t.ParentTaxonID)cte + ON cte.taxonid = w.TaxonID + INNER JOIN TaxonGroep tg ON tg.ID = cte.TaxonGroepID +WHERE 1=1 + AND cte.ParentTaxoncode NOT LIKE '%-sp' + AND DATEPART(year, e.BeginDatum) >= {starting_year} + AND DATEPART(year, e.BeginDatum) = DATEPART(year, e.EindDatum) + AND tg.Beschrijving = {taxongroup} + AND ws.code IN ('GDGA','GDGK') +ORDER BY DATEPART(year, e.BeginDatum) desc OFFSET 0 ROWS", starting_year = starting_year, taxongroup = taxongroup, .con = connection) diff --git a/R/get_florabank_traits.R b/R/get_florabank_traits.R index bb9bf0b..465b565 100644 --- a/R/get_florabank_traits.R +++ b/R/get_florabank_traits.R @@ -23,16 +23,16 @@ globalVariables("%LIKE%") #' (collect = TRUE) containing the trait values for each species and for all #' partially matched traits. The dataframe contains the variables #' `TaxonID`, -#' `TaxonAfkorting`, -#' `TaxonWetenschappelijk`, -#' `TaxonNederlands`, +#' `TaxonCode`, +#' `NaamWetenschappelijk`, +#' `NaamNederlands`, #' `Kenmerk`, -#' `Code`, +#' `KenmerkCode`, #' `Omschrijving`, #' `Rekenwaarde`, #' `Bron` and #' `ExtraOmschrijving`. -#' The first four variables identify the taxon, the latter five variables relate +#' The first four variables identify the taxon, the latter six variables relate #' to the taxon traits. #' #' @importFrom dplyr @@ -41,11 +41,7 @@ globalVariables("%LIKE%") #' distinct #' pull #' %>% -#' inner_join -#' left_join -#' filter -#' select -#' rename +#' @importFrom glue glue_sql #' @importFrom rlang .data #' @importFrom assertthat assert_that #' @@ -56,7 +52,7 @@ globalVariables("%LIKE%") #' library(inbodb) #' library(dplyr) #' # connect to florabank -#' db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +#' db_connectie <- connect_inbo_dbase("D0152_00_Flora") #' #' # get all Ellenberg values via partial matching, return as lazy query #' fb_ellenberg <- get_florabank_traits(db_connectie, "llenberg") @@ -84,53 +80,48 @@ get_florabank_traits <- function(connection, trait_name, collect = FALSE) { assert_that(inherits(connection, what = "Microsoft SQL Server"), msg = "Not a connection object to database.") - assert_that(connection@info$dbname == "D0021_00_userFlora") + assert_that(connection@info$dbname == "D0152_00_Flora") if (missing(trait_name)) { - traitnames <- tbl(connection, "tblTaxonKenmerk") %>% - distinct(.data$Naam) %>% + traitnames <- tbl(connection, "Kenmerk") %>% + distinct(.data$Beschrijving) %>% collect() %>% - pull(.data$Naam) + pull(.data$Beschrijving) message <- paste0("Please provide (part of) a trait name from this list: ", paste(traitnames, collapse = ", ")) options(warning.length = nchar(message)) stop(message) } - trait_name <- tolower(trait_name) + sql_statement <- " + SELECT t.ID AS TaxonID + , t.Code AS TaxonCode + , t.NaamWetenschappelijk + , t.NaamNederlands + , k.Kenmerk + , kc.Code AS KenmerkCode + , kc.naam AS Omschrijving + , kc.Rekenwaarde + , k.KenmerkBron AS Bron + , tk.Beschrijving AS ExtraOmschrijving + FROM [dbo].[Taxon_KenmerkWaarde] tk + inner join Taxon t on t.ID = tk.TaxonID + inner join TaxonGroep tg on tg.ID = t.TaxonGroepID + left join vw.vw_kenmerk k on k.kenmerkid = tk.KenmerkID + left join vw.vw_kenmerkcategorie kc on + kc.kenmerkcategorieid = tk.KenmerkCategorieID + WHERE 1 = 1" - fb_taxon <- tbl(connection, "tblTaxon") - fb_taxon_kenmerk <- tbl(connection, "tblTaxonKenmerk") - fb_taxon_kenmerk_waarde <- tbl(connection, "tblTaxonKenmerkWaarde") - rel_taxon_taxon_kenmerk_waarde <- - tbl(connection, "relTaxonTaxonKenmerkWaarde") + like_string <- paste0(" AND k.kenmerk like '%", trait_name, "%'") - query_result <- rel_taxon_taxon_kenmerk_waarde %>% - inner_join(fb_taxon_kenmerk %>% - filter(tolower(.data$Naam) %LIKE% - paste0("%", trait_name, "%")) %>% - select(.data$ID, .data$Naam, .data$Bron), - by = c("TaxonKenmerkID" = "ID")) %>% - rename(ExtraOmschrijving = .data$Omschrijving) %>% - left_join(fb_taxon_kenmerk_waarde %>% - distinct(.data$ID, .data$Code, .data$TaxonKenmerkID, - .data$Omschrijving, .data$Rekenwaarde), - by = c("TaxonKenmerkID" = "TaxonKenmerkID", - "TaxonKenmerkWaardeID" = "ID")) %>% - left_join(fb_taxon %>% - rename(NaamAfkorting = .data$Code), - by = c("TaxonID" = "ID")) %>% - distinct(.data$TaxonID, - TaxonAfkorting = .data$NaamAfkorting, - TaxonWetenschappelijk = .data$NaamWetenschappelijk, - TaxonNederlands = .data$NaamNederlands, - Kenmerk = .data$Naam, - .data$Code, - .data$Omschrijving, - .data$Rekenwaarde, - .data$Bron, - .data$ExtraOmschrijving - ) + sql_statement <- glue_sql( + sql_statement, + like_string, + .con = connection) + + sql_statement <- iconv(sql_statement, from = "UTF-8", to = "latin1") + + query_result <- tbl(connection, sql(sql_statement)) if (!isTRUE(collect)) { return(query_result) } else { diff --git a/inst/CITATION b/inst/CITATION index 2944256..a515b91 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -2,12 +2,12 @@ citHeader("To cite `inbodb` in publications please use:") # begin checklist entry bibentry( bibtype = "Manual", - title = "inbodb: Connect to and Retrieve Data from Databases on the INBO Server. Version 0.0.6", + title = "inbodb: Connect to and Retrieve Data from Databases on the INBO Server. Version 0.0.7", author = c( author = c(person(given = "Els", family = "Lommelen"), person(given = "Hans", family = "Van Calster"), person(given = "Els", family = "De Bie"), person(given = "Floris", family = "Vanderhaeghe"), person(given = "Frederic", family = "Piesschaert"), person(given = "Toon", family = "Westra"))), - year = 2024, + year = 2025, url = "https://inbo.github.io/inbodb/", abstract = "A bundle of functions to connect to and retrieve data from databases on the INBO server, with dedicated functions to query some of these databases.", - textVersion = "Lommelen, Els; Van Calster, Hans; De Bie, Els; Vanderhaeghe, Floris; Piesschaert, Frederic; Westra, Toon (2024) inbodb: Connect to and Retrieve Data from Databases on the INBO Server. Version 0.0.6. https://inbo.github.io/inbodb/", + textVersion = "Lommelen, Els; Van Calster, Hans; De Bie, Els; Vanderhaeghe, Floris; Piesschaert, Frederic; Westra, Toon (2025) inbodb: Connect to and Retrieve Data from Databases on the INBO Server. Version 0.0.7. https://inbo.github.io/inbodb/", keywords = "sql; databases; queries", ) # end checklist entry diff --git a/inst/en_gb.dic b/inst/en_gb.dic index 8a36595..d6b84c0 100644 --- a/inst/en_gb.dic +++ b/inst/en_gb.dic @@ -170,6 +170,7 @@ telmethode tibble uit uitzondering +userFlora veldwerk vermeld voor diff --git a/man/connect_inbo_dbase.Rd b/man/connect_inbo_dbase.Rd index 79268ae..54f46b7 100644 --- a/man/connect_inbo_dbase.Rd +++ b/man/connect_inbo_dbase.Rd @@ -23,7 +23,7 @@ The function can only be used from within the INBO network. } \details{ For more information, refer to -\href{https://inbo.github.io/tutorials/tutorials/r_database_access/}{this +\href{https://tutorials.inbo.be/tutorials/r_database_access/}{this tutorial}. } \examples{ diff --git a/man/get_florabank_observations.Rd b/man/get_florabank_observations.Rd index ba84e1d..42d4373 100644 --- a/man/get_florabank_observations.Rd +++ b/man/get_florabank_observations.Rd @@ -30,18 +30,19 @@ environment.} A dataframe with the following variables: \code{NaamNederlands}, \code{NaamWetenschappelijk}, +\code{AcceptedNaamWetenschappelijk}, \code{Bron}, \code{BeginDatum}, \code{EindDatum}, -\code{hok}, +\code{Hok}, \code{Toponiem}, -\code{CommentaarTaxon}, -\code{CommentaarHabitat}, -\code{WaarnemingID}, +\code{CommentaarEvent}, +\code{CommentaarWaarneming}, +\code{EventID}, +\code{X_event}, +\code{Y_event}, \code{X_waarneming}, -\code{Y_waarneming}, -\code{X_meting}, -\code{Y_meting} +\code{Y_waarneming} } \description{ This function takes as input a character vector with one or more names of @@ -56,7 +57,7 @@ level information about the matching taxa. # code can only be run if a connection to the database is possible library(inbodb) # connect to florabank -db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +db_connectie <- connect_inbo_dbase("D0152_00_Flora") # query and collect the data using scientific name succprat1 <- get_florabank_observations(db_connectie, diff --git a/man/get_florabank_taxon_ifbl_year.Rd b/man/get_florabank_taxon_ifbl_year.Rd index 625d694..6a69b89 100644 --- a/man/get_florabank_taxon_ifbl_year.Rd +++ b/man/get_florabank_taxon_ifbl_year.Rd @@ -25,7 +25,7 @@ Default is 2010.} \item{taxongroup}{Choose for which taxonomic group you want the unique combinations. One of \code{"Vaatplanten"} (the default), \code{"Mossen"}, -\code{"Korstmossen"} +\code{"Lichenen (korstmossen)"} or \code{"Kranswieren"}.} \item{collect}{If FALSE (the default), a remote \code{tbl} object is returned. @@ -40,16 +40,10 @@ A dataframe with one line for each combination of taxon, \code{IFBL}-square (either at 1 km x 1 km or 4 km x 4 km resolution) and year. In case the resolution is 1 km x 1 km, a variable \code{ifbl_4by4} gives the corresponding -\code{ifbl_4by4} identifier within which the \code{ifbl_1by1} square is located. -In case -the resolution is 4 km x 4 km, the variable \code{ifbl_squares} is a concatenation -of all nested squares with observations for the taxon in the corresponding -year. This can be nested 1 x 1 squares as well as the corresponding 4 x 4 -square (the latter is the case if the original resolution of the observation -is at 4 x 4 resolution). In addition, the variable \code{ifbl_number_squares} -gives -the number of unique nested squares where the taxon was observed for that -year and 4 x 4 square combination. +4 km x 4 km square within which the 1 km x 1 km square is located. +In case the resolution is 4 km x 4 km the variable \code{ifbl_number_squares} +gives the number of unique nested squares where the taxon was observed +for that year and 4 x 4 square combination. } \description{ This functions queries all validated observations of the florabank database @@ -64,13 +58,13 @@ group can be chosen. \dontrun{ library(inbodb) # connect to florabank -db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +db_connectie <- connect_inbo_dbase("D0152_00_Flora") # get records at 1 km x 1 km resolution for vascular plants from 2010 # (default) without collecting all data into memory (default). fb_kwartier <- get_florabank_taxon_ifbl_year(db_connectie) # to collect the data in memory set collect to TRUE or do -fb_kwartier <- collect(fb_kwartier) +fb_kwartier <- dplyr::collect(fb_kwartier) # get records at 4 km x 4 km resolution starting from 2000 fb_uur <- get_florabank_taxon_ifbl_year(db_connectie, starting_year = 2000, diff --git a/man/get_florabank_traits.Rd b/man/get_florabank_traits.Rd index bca3062..75b67e1 100644 --- a/man/get_florabank_traits.Rd +++ b/man/get_florabank_traits.Rd @@ -26,16 +26,16 @@ A remote \code{tbl} object (collect = FALSE) or a \code{tibble} dataframe (collect = TRUE) containing the trait values for each species and for all partially matched traits. The dataframe contains the variables \code{TaxonID}, -\code{TaxonAfkorting}, -\code{TaxonWetenschappelijk}, -\code{TaxonNederlands}, +\code{TaxonCode}, +\code{NaamWetenschappelijk}, +\code{NaamNederlands}, \code{Kenmerk}, -\code{Code}, +\code{KenmerkCode}, \code{Omschrijving}, \code{Rekenwaarde}, \code{Bron} and \code{ExtraOmschrijving}. -The first four variables identify the taxon, the latter five variables relate +The first four variables identify the taxon, the latter six variables relate to the taxon traits. } \description{ @@ -47,7 +47,7 @@ florabank and returns the taxon trait values in a tidy data format library(inbodb) library(dplyr) # connect to florabank -db_connectie <- connect_inbo_dbase("D0021_00_userFlora") +db_connectie <- connect_inbo_dbase("D0152_00_Flora") # get all Ellenberg values via partial matching, return as lazy query fb_ellenberg <- get_florabank_traits(db_connectie, "llenberg") diff --git a/vignettes/get_data_inboveg.Rmd b/vignettes/get_data_inboveg.Rmd index 2bd4667..ee49256 100644 --- a/vignettes/get_data_inboveg.Rmd +++ b/vignettes/get_data_inboveg.Rmd @@ -99,7 +99,7 @@ survey_info ``` Get information of all surveys. -This time we will not use `collect = TRUE`, which will return a [lazy query](https://docs.lucee.org/guides/cookbooks/lazy_queries.html): +This time we will not use `collect = TRUE`, which will return a [lazy query](https://dbplyr.tidyverse.org/reference/tbl.src_dbi.html): ```{r} allsurveys <- get_inboveg_survey(con) diff --git a/vignettes/get_data_inboveg_results.Rda b/vignettes/get_data_inboveg_results.Rda index 6c2ec3d..ffd38c0 100644 Binary files a/vignettes/get_data_inboveg_results.Rda and b/vignettes/get_data_inboveg_results.Rda differ diff --git a/vignettes/get_data_meetnetten.Rmd b/vignettes/get_data_meetnetten.Rmd index 5913961..a60014a 100644 --- a/vignettes/get_data_meetnetten.Rmd +++ b/vignettes/get_data_meetnetten.Rmd @@ -200,7 +200,7 @@ one or more monitoring schemes. Below we select all visits for the location Arenbergpolder of the Argusvlinder monitoring scheme. We see that in most years the location is counted 6 times per year as demanded -by the [monitoring protocol](https://purews.inbo.be/ws/portalfiles/portal/17657883/MaesEtal_2019_MonitoringsprotocolDagvlindersVersie2.pdf). +by the [monitoring protocol](https://doi.org/10.21436/inbor.16744530). ```{r example_visits} diff --git a/vignettes/get_data_meetnetten_results.Rda b/vignettes/get_data_meetnetten_results.Rda index f5108a5..12f3211 100644 Binary files a/vignettes/get_data_meetnetten_results.Rda and b/vignettes/get_data_meetnetten_results.Rda differ diff --git a/vignettes/get_data_taxonlijsten.Rmd b/vignettes/get_data_taxonlijsten.Rmd index ac0f08e..6080839 100644 --- a/vignettes/get_data_taxonlijsten.Rmd +++ b/vignettes/get_data_taxonlijsten.Rmd @@ -88,7 +88,7 @@ rl_dagvlinders <- get_taxonlijsten_lists(con, rl_dagvlinders ``` -Get all recent versions of red lists. This time we will not use `collect = TRUE`, which will return a [lazy query](https://docs.lucee.org/guides/cookbooks/lazy_queries.html): +Get all recent versions of red lists. This time we will not use `collect = TRUE`, which will return a [lazy query](https://dbplyr.tidyverse.org/reference/tbl.src_dbi.html): ```{r, eval=FALSE, echo=TRUE} rodelijst_recent <- get_taxonlijsten_lists(con, diff --git a/vignettes/get_data_taxonlijsten_results.Rda b/vignettes/get_data_taxonlijsten_results.Rda index e822c46..03fba37 100644 Binary files a/vignettes/get_data_taxonlijsten_results.Rda and b/vignettes/get_data_taxonlijsten_results.Rda differ