Skip to content

Commit

Permalink
moved NP seaweed gapfill exploration to new archive doc, rendered upd…
Browse files Browse the repository at this point in the history
…ateed step1b in NP
  • Loading branch information
annaramji committed Aug 28, 2024
1 parent 955e3de commit da95c1b
Show file tree
Hide file tree
Showing 3 changed files with 283 additions and 162 deletions.
93 changes: 1 addition & 92 deletions globalprep/np/v2024/STEP1b_np_seaweeds_prep.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ source(here("globalprep", "np", v_scen_year, "R", "np_fxn.R"))
source(here("globalprep", "mar", v_scen_year, "mar_fxs.R")) # functions specific to mariculture dealing with compound countries
```


# Import Raw Data: FAO Mariculture data

Mariculture production in tonnes.
Expand Down Expand Up @@ -404,98 +405,6 @@ write_csv(maric, here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv")
```


## test FAO gapfill

```{r}
# check last year's data to see if they have singapore values
v2023_seaweed_tonnes_weighting <- readr::read_csv(here(previous_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
v2023_singapore_seaweed <- v2023_seaweed_tonnes_weighting %>% filter(rgn_id == 208)
nrow(v2023_singapore_seaweed)
# 0
# Gapfilling pt. 2: FAO commodities data used to fill data gaps
np_seaweeds_tonnes_weighting <- readr::read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
# read in processed FAO commodities data
commodities_data <- readr::read_csv(here(current_np_dir, "int", "np_harvest_tonnes_usd.csv"))
fao_comm_seaweed <- commodities_data %>%
filter(product == "seaweeds")
seaweed_sust <- read_csv(here(current_np_dir, "output", "np_seaweed_sust.csv"))
test_seaweed <- readr::read_csv(here(current_np_dir, "output", "np_seaweed_harvest_tonnes.csv"))
# here's what np_seaweed_tonnes_weighting is used for in step 2:
sw_tonnes_raw <- read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
aquaculture_seaweed_rgns <- unique(sw_tonnes_raw$rgn_id)
commodities_seaweed_rgns <- unique(fao_comm_seaweed$rgn_id)
setdiff(aquaculture_seaweed_rgns, commodities_seaweed_rgns)
setdiff(commodities_seaweed_rgns, aquaculture_seaweed_rgns)
length(setdiff(commodities_seaweed_rgns, aquaculture_seaweed_rgns))
# 58
fao_comm_sw_zero_check <- fao_comm_seaweed %>% filter(tonnes == 0)
View(fao_comm_sw_zero_check) # noted issue in rgn 209 (China) from 1990-1992 -- 0 for tonnes, non-zero for value. could indicate that the upstream gapfilling regression coefficient is 0 for these years and the years before, or that this is the tail end of the data so it's zero-filled.
# filter to relevant years (last 5)
fao_comm_sw_zero <- fao_comm_seaweed %>%
filter(year %in% years) %>%
group_by(rgn_id) %>%
# filter to keep only regions where tonnes == 0 for ALL years
filter(all(tonnes == 0)) %>%
# check that tonnes == 0 for all years (length of year range, or 5)
filter(n() == length(years)) %>%
# select unique region IDs
dplyr::distinct(rgn_id) %>%
ungroup()
View(fao_comm_sw_zero)
length(unique(fao_comm_sw_zero$rgn_id))
# 7
# filter out these regions from the fao commodities seaweed subset
fao_comm_seaweed_filter <- fao_comm_seaweed %>%
dplyr::filter(!rgn_id %in% fao_comm_sw_zero$rgn_id)
# check
nrow(fao_comm_seaweed_filter %>% distinct(rgn_id)) == (nrow(fao_comm_seaweed %>% distinct(rgn_id)) - nrow(fao_comm_sw_zero))
# alternatively,
length(unique(fao_comm_seaweed_filter$rgn_id)) == (length(unique(fao_comm_seaweed$rgn_id)) - length(unique(fao_comm_sw_zero$rgn_id)))
# step 2 usage
sw_tonnes <- sw_tonnes_raw %>%
mutate(product = "seaweeds") %>%
group_by(rgn_id, year, product) %>% # per region, year, and product,
summarise(tonnes = sum(tonnes, na.rm = TRUE)) %>% # sum across all species of seaweed
dplyr::filter(year %in% years) %>% # filter to 5 year range
full_join(sw_fill_df, by = c("rgn_id", "year", "product")) %>%
mutate(tonnes = ifelse(is.na(tonnes), 0, tonnes)) %>% ## gapfill the NAs to be 0
dplyr::select(rgn_id, year, product, tonnes) %>%
ungroup() %>%
group_by(rgn_id, product) %>%
summarise(tonnes = mean(tonnes)) %>% ## calculate 5 year average
ungroup()
```






Expand Down
Loading

0 comments on commit da95c1b

Please sign in to comment.