moved NP seaweed gapfill exploration to new archive doc, rendered upd…

…ateed step1b in NP
OHI-Science · Aug 28, 2024 · da95c1b · da95c1b
1 parent 955e3de
commit da95c1b
Show file tree

Hide file tree

Showing 3 changed files with 283 additions and 162 deletions.
diff --git a/globalprep/np/v2024/STEP1b_np_seaweeds_prep.Rmd b/globalprep/np/v2024/STEP1b_np_seaweeds_prep.Rmd
@@ -103,6 +103,7 @@ source(here("globalprep", "np", v_scen_year, "R", "np_fxn.R"))
 source(here("globalprep", "mar", v_scen_year, "mar_fxs.R")) # functions specific to mariculture dealing with compound countries
 ```
 
+
 # Import Raw Data: FAO Mariculture data
 
 Mariculture production in tonnes.
@@ -404,98 +405,6 @@ write_csv(maric, here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv")
 ```
 
 
-## test FAO gapfill
-
-```{r}
-
-# check last year's data to see if they have singapore values
-
-v2023_seaweed_tonnes_weighting <- readr::read_csv(here(previous_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
-
-v2023_singapore_seaweed <- v2023_seaweed_tonnes_weighting %>% filter(rgn_id == 208)
-nrow(v2023_singapore_seaweed)
-# 0
-
-# Gapfilling pt. 2: FAO commodities data used to fill data gaps 
-
-np_seaweeds_tonnes_weighting <- readr::read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv"))
-
-# read in processed FAO commodities data
-commodities_data <- readr::read_csv(here(current_np_dir, "int", "np_harvest_tonnes_usd.csv"))
-
-fao_comm_seaweed <- commodities_data %>% 
-  filter(product == "seaweeds")
-
-seaweed_sust <- read_csv(here(current_np_dir, "output", "np_seaweed_sust.csv"))
-
-test_seaweed <- readr::read_csv(here(current_np_dir, "output", "np_seaweed_harvest_tonnes.csv"))
-
-
-# here's what np_seaweed_tonnes_weighting is used for in step 2:
-
-sw_tonnes_raw <- read_csv(here(current_np_dir, "int", "np_seaweeds_tonnes_weighting.csv")) 
-
-
-aquaculture_seaweed_rgns <- unique(sw_tonnes_raw$rgn_id)
-commodities_seaweed_rgns <- unique(fao_comm_seaweed$rgn_id)
-
-setdiff(aquaculture_seaweed_rgns, commodities_seaweed_rgns)
-setdiff(commodities_seaweed_rgns, aquaculture_seaweed_rgns)
-
-length(setdiff(commodities_seaweed_rgns, aquaculture_seaweed_rgns))
-# 58
-
-
-fao_comm_sw_zero_check <- fao_comm_seaweed %>% filter(tonnes == 0)
-View(fao_comm_sw_zero_check) # noted issue in rgn 209 (China) from 1990-1992 -- 0 for tonnes, non-zero for value. could indicate that the upstream gapfilling regression coefficient is 0 for these years and the years before, or that this is the tail end of the data so it's zero-filled.
-
-# filter to relevant years (last 5)
-fao_comm_sw_zero <- fao_comm_seaweed %>% 
-  filter(year %in% years) %>%
-  group_by(rgn_id) %>% 
-  # filter to keep only regions where tonnes == 0 for ALL years
-  filter(all(tonnes == 0)) %>% 
-  # check that tonnes == 0 for all years (length of year range, or 5)
-  filter(n() == length(years)) %>%
-  # select unique region IDs
-  dplyr::distinct(rgn_id) %>% 
-  ungroup()
-
-View(fao_comm_sw_zero)
-length(unique(fao_comm_sw_zero$rgn_id))
-# 7
-
-
-
-# filter out these regions from the fao commodities seaweed subset
-fao_comm_seaweed_filter <- fao_comm_seaweed %>% 
-  dplyr::filter(!rgn_id %in% fao_comm_sw_zero$rgn_id)
-
-
-# check
-nrow(fao_comm_seaweed_filter %>% distinct(rgn_id)) == (nrow(fao_comm_seaweed %>% distinct(rgn_id)) - nrow(fao_comm_sw_zero))
-# alternatively, 
-length(unique(fao_comm_seaweed_filter$rgn_id)) == (length(unique(fao_comm_seaweed$rgn_id)) - length(unique(fao_comm_sw_zero$rgn_id)))
-
-
-# step 2 usage
-
-sw_tonnes <- sw_tonnes_raw %>%
-  mutate(product = "seaweeds") %>%
-  group_by(rgn_id, year, product) %>% # per region, year, and product,
-  summarise(tonnes = sum(tonnes, na.rm = TRUE)) %>% # sum across all species of seaweed
-  dplyr::filter(year %in% years) %>% # filter to 5 year range 
-  full_join(sw_fill_df, by = c("rgn_id", "year", "product")) %>%
-    mutate(tonnes = ifelse(is.na(tonnes), 0, tonnes)) %>% ## gapfill the NAs to be 0
-  dplyr::select(rgn_id, year, product, tonnes) %>%
-  ungroup() %>%
-  group_by(rgn_id, product) %>%
-  summarise(tonnes = mean(tonnes)) %>% ## calculate 5 year average
-  ungroup()
-```
-
-
-