Skip to content

Commit

Permalink
Merge pull request #334 from inbo/uat
Browse files Browse the repository at this point in the history
rebase
  • Loading branch information
SanderDevisscher authored Jan 29, 2025
2 parents c1a4162 + e22e110 commit 89eb62e
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 33 deletions.
11 changes: 3 additions & 8 deletions data/input/belgium_class_base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ SELECT
) AS eeaCellCode,
classKey,
class,
familyKey,
family,
COUNT(*) AS occurrences,
MIN(COALESCE(coordinateUncertaintyInMeters, 1000)) AS minCoordinateUncertaintyInMeters,
MIN(GBIF_TemporalUncertainty(eventDate)) AS minTemporalUncertainty
Expand All @@ -19,8 +17,7 @@ SELECT
occurrenceStatus = 'PRESENT'
AND countrycode = 'BE'
AND year >= 1900
AND hasCoordinate = TRUE
AND familyKey IS NOT NULL
AND hasCoordinate = TRUE
AND NOT ARRAY_CONTAINS(issue, 'ZERO_COORDINATE')
AND NOT ARRAY_CONTAINS(issue, 'COORDINATE_OUT_OF_RANGE')
AND NOT ARRAY_CONTAINS(issue, 'COORDINATE_INVALID')
Expand All @@ -43,10 +40,8 @@ SELECT
year,
eeaCellCode,
classKey,
class,
familyKey,
family
class
ORDER BY
year DESC,
eeaCellCode ASC,
familyKey ASC;
classKey ASC;
3 changes: 1 addition & 2 deletions data/interim/alien_taxa_without_occs.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ key nubKey canonicalName first_observed last_observed class kingdom
182134454 1195013 Otiorhynchus aurifer 1999 2018 Insecta Animalia
152544520 1319260 Tetraponera allaborans 1980 2018 Insecta Animalia
152544508 1321805 Pheidole bilimeki 1911 2018 Insecta Animalia
152544494 1325561 Paratrechina longicornis 1980 2018 Insecta Animalia
213213874 1349836 Hylaeus absolutus 2021 2021 Insecta Animalia
152544459 1419894 Euborellia annulipes 2016 2018 Insecta Animalia
152544422 1420197 Aleurodothrips fasciapennis 2018 2018 Insecta Animalia
Expand Down Expand Up @@ -86,8 +87,6 @@ key nubKey canonicalName first_observed last_observed class kingdom
152543699 2308541 Limnodrilus cervix 2009 2016 Clitellata Animalia
182133944 2308549 Limnodrilus maumeensis 2014 2014 Clitellata Animalia
152543695 2308605 Potamothrix vejdovskyi 2009 2016 Clitellata Animalia
159747758 2320810 Boccardia proboscidea 2000 2000 Polychaeta Animalia
159747756 2320998 Boccardiella hamata 2011 2011 Polychaeta Animalia
182133960 2321001 Boccardiella ligerica NA NA Polychaeta Animalia
152543727 2321012 Marenzelleria viridis 1995 2016 Polychaeta Animalia
152543510 2351064 Coregonus nasus NA NA NA Animalia
Expand Down
1 change: 0 additions & 1 deletion data/interim/taxa_last_observed_in_BE_before_1950.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ taxonKey canonicalName first_observed last_observed class kingdom kingdomKey cla
10773250 Melilotus siculus 1948 1948 Magnoliopsida Plantae 6 220
5349991 Coronilla securidaca 1909 1909 Magnoliopsida Plantae 6 220
2965250 Medicago tornata 1949 1949 Magnoliopsida Plantae 6 220
3053399 Rorippa pyrenaica 1854 1939 Magnoliopsida Plantae 6 220
7711892 Isatis quadrialata 1909 1932 Magnoliopsida Plantae 6 220
3089549 Centaurea depressa 1909 1922 Magnoliopsida Plantae 6 220
3026426 Spiraea brachybotrys 1947 1947 Magnoliopsida Plantae 6 220
93 changes: 71 additions & 22 deletions src/cube_preprocessing.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,12 @@ if(Sys.getenv("S3_BUCKET") != ""){
```

## connect to bucket
```{r connect to bucket, eval=FALSE}
source("./src/connect_to_bucket.R")
connect_to_bucket(bucket_name = UAT_bucket)
```{r connect to bucket}
if(Sys.getenv("amiabot") != "yes"){
print("Executor of the script is a human >> connecting to bucket")
source("./src/connect_to_bucket.R")
connect_to_bucket(bucket_name = UAT_bucket)
}
```

# Get data
Expand Down Expand Up @@ -130,7 +133,7 @@ which is updated by the following code:
be_alientaxa_cube <- df %>%
left_join(utm1_gemeentes_provincies, by = c("eea_cell_code" = "CELLCODE")) %>%
st_drop_geometry() %>%
filter(!is.na(isFlanders)|!is.na(isWallonia)|!is.na(isBrussels))
filter(!is.na(isFlanders)|!is.na(isWallonia)|!is.na(isBrussels))
be_alientaxa_cube %>%
select(c(year,eea_cell_code,taxonKey,n = obs,min_coord_uncertainty,isFlanders,isWallonia,isBrussels,gemeente,provincie,gewest)) %>%
Expand Down Expand Up @@ -417,10 +420,10 @@ df_bl_xy <-
bind_cols(
tibble(
x = unlist(str_extract_all(unique(df_bl$eea_cell_code),
pattern = "(?<=E)[0-9\\-]+"
pattern = "(?<=E)[0-9\\-]+"
)),
y = unlist(str_extract_all(unique(df_bl$eea_cell_code),
pattern = "(?<=N)[0-9\\-]+"
pattern = "(?<=N)[0-9\\-]+"
))
) %>%
mutate_all(as.integer)
Expand Down Expand Up @@ -627,41 +630,60 @@ df_ts <- pmap_dfr(df_cc,
)
```

```{r cleanup df_cc}
remove(df_cc)
gc()
```{r cleanup df_cc, message=NA}
if(Sys.getenv("amiabot") == "yes"){
cat("Removing df_cc to free up some memory")
remove(df_cc)
gc()
}
```

## Add data

# add occurrence data

```{r add_occurrence_data}
df <- df %>%
select(taxonKey, year, eea_cell_code, obs)
gc()
df_ts <-
df_ts %>%
left_join(df %>% select(taxonKey, year, eea_cell_code, obs),
left_join(df,
by = c("taxonKey", "year", "eea_cell_code"))
```

# remove df to free up some memory

```{r remove_df}
remove(df)
gc()
if(Sys.getenv("amiabot") == "yes"){
cat("Removing df to free up some memory")
remove(df)
gc()
}
```

# add membership to protected areas

```{r add_protected_areas}
df_prot_areas <- df_prot_areas %>%
select(CELLCODE, natura2000)
gc()
df_ts <-
df_ts %>%
left_join(df_prot_areas %>% select(CELLCODE, natura2000),
left_join(df_prot_areas,
by = c("eea_cell_code" = "CELLCODE"))
```

```{r remove_prot_areas}
remove(df_prot_areas)
gc()
if(Sys.getenv("amiabot") == "yes"){
cat("Removing df_prot_areas to free up some memory")
remove(df_prot_areas)
gc()
}
```

# add classKey
Expand All @@ -675,26 +697,43 @@ df_ts <-
```

```{r remove_spec_names}
remove(spec_names)
gc()
if(Sys.getenv("amiabot") == "yes"){
cat("Removing spec_names to free up some memory")
remove(spec_names)
gc()
}
```

## Research effort correction

```{r Add baseline data}
# at class level diminished by obs of specific alien taxon
df_bl <- df_bl %>%
select(year, eea_cell_code, classKey, cobs) %>%
distinct()
gc()
nrow_prior <- nrow(df_ts)
df_ts <-
df_ts %>%
left_join(
df_bl %>%
select(year, eea_cell_code, classKey, cobs),
df_bl,
by = c("year", "eea_cell_code", "classKey")
)
if(nrow_prior != nrow(df_ts)){
stop("Number of rows changed after join")
}
```

```{r remove_df_bl}
remove(df_bl)
gc()
if(Sys.getenv("amiabot") == "yes"){
cat("Removing df_bl to free up some memory")
remove(df_bl)
gc()
}
```

To correct the effect of research effort of an alien speies, we calculate the number of observations at class level excluding the observations of the alien species itself:
Expand All @@ -705,6 +744,17 @@ df_ts <-
df_ts %>%
mutate(cobs = cobs - obs)
df_ts_below_zero <- df_ts %>% filter(cobs < 0)
if(nrow(df_ts_below_zero) > 0){
warning("Negative values in cobs column")
# replace negative values with 0
df_ts <-
df_ts %>%
mutate(cobs = if_else(cobs < 0, 0, cobs))
}
# replace NAs with 0
df_ts <-
df_ts %>%
Expand Down Expand Up @@ -778,7 +828,6 @@ write_tsv(spec_names,
"timeseries_taxonomic_info.tsv"),
na = ""
)
```

reload previously loaded inbotheme version
Expand Down

0 comments on commit 89eb62e

Please sign in to comment.