-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1a_update_higherrank.R
59 lines (45 loc) · 2.22 KB
/
1a_update_higherrank.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# --------------------------------
# The script is recommended if the user updates the data after a long time since
# the last update. After any major GBIF Backbone Taxonomy updates, it might happen
# that the set of names which were matched to 'higherrank' changes. Most of
# them, we can expect, remain the same, but to be sure and to fix possible issues
# we need to check that again.
# To avoid tedious manual work, we use data from previous update to fullfil possible
# variants of name spellings in the new 'higherrank' dataframe. For that reason,
# we should always keep previous version of files.
# REQUIRE MANUAL OPERATIONS! DON'T RUN IT IN THE BACKGROUND!
rm(list = ls()) # Reset R`s brain
# Load libraries for data manipulations
library(tidyr)
library(dplyr)
library(DataEditR) # GUI for dataframe editing
# Load input data from csv files
# Taxon names matched as 'higherrank' during previous dump update
previous_higherrank <- read.csv("./data/higherrank_nameVariants.csv") %>%
select(-X, -ID)
# Taxon names matched as 'higherrank' during current session
new_higherrank <- read.csv("./temp/higherrank.csv") %>%
select(-X)
# Fulfill names with spelling variants from the previous match, for cases where
# names remain the same.
updated_higherrank <- new_higherrank %>%
left_join(previous_higherrank, by = "verbatimScientificName")
# Separate existing names from that appear just this time
# (after recent Backbone updates), and write them into a new variable
not_changed_since_last_time <- updated_higherrank %>%
filter(!is.na(X.1))
to_be_fulfilled_manually <- updated_higherrank %>%
filter(is.na(X.1))
# Launch GUI and add necessary name variants right to the names
manually_fulfilled <- data_edit(to_be_fulfilled_manually)
# When the editing is done, press 'synchronise' button, then 'Done' to finish editing
# Combine previous and newcomer parts of the data
higherrank_nameVariants <- not_changed_since_last_time %>%
bind_rows(manually_fulfilled)
# Write data as CSV
# Previous version (as back-up)
write.csv(previous_higherrank, "./data/higherrank_nameVariants_prev.csv")
# Current version
write.csv(higherrank_nameVariants, "./data/higherrank_nameVariants.csv")
rm(list = ls()) # Clean-up the Environment
# End of script ####