-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathaggregate.R
117 lines (81 loc) · 3.14 KB
/
aggregate.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Purpose: Create Longitudinal Data from CDC Scrapes
library(stringr)
library(dplyr)
library(purrr)
library(data.table)
h <- here::here
get_data <- function(type = "json"){
county_details <- fs::dir_ls(h("data"), glob = sprintf("*.%s", type))
pull_time <- lubridate::as_datetime(tools::file_path_sans_ext(basename(county_details)))
pull_date <- lubridate::date(pull_time)
dat_information <- data.frame(
county_details= unname(county_details),
pull_time,
pull_date
) %>%
group_by(pull_date) %>%
filter(pull_time==max(pull_time))
if(type == "json"){
dat_raw <- map(dat_information$county_details, jsonlite::read_json,simplifyVector = TRUE)
names(dat_raw) <- dat_information[["pull_date"]]
dat_dat <- map(dat_raw, "data", .id = "date")
dat_dat <- rbindlist(dat_dat, idcol = "date", fill = TRUE)
} else {
dat_raw <- map(dat_information$county_details, data.table::fread)
names(dat_raw) <- dat_information[["pull_date"]]
dat_dat <- dat_raw
dat_dat <- rbindlist(dat_dat, idcol = "date", fill = TRUE)
dat_dat$State <- dat_dat$Location
}
setDT(dat_dat)
dat_dat[,Cases := as.numeric(Cases)]
dat_dat[,State := ifelse(is.na(State),Location, State )]
setnames(x = dat_dat, old = c("date","State", "Cases"),
new = c("DateDT", "StateDSC", "CasesCumulativeCNT"))
if("Range" %in% names(dat_dat)){
dat_dat$Range = NULL
}
dat_dat$Location = NULL
dat_dat$`Case Range` = NULL
return(dat_dat)
}
json_dat <- get_data(type = "json")
csv_dat <- get_data(type = "csv")
dat_dat <- rbind(csv_dat, json_dat, fill = TRUE)
setorderv(dat_dat, c("DateDT","StateDSC"))
dat_dat <- dat_dat[,tail(.SD, 1), by = c("DateDT","StateDSC")]
dat_dat[order(DateDT),CasesDailyNBR := CasesCumulativeCNT - dplyr::lag(CasesCumulativeCNT, 1), by = "StateDSC"]
data.table::fwrite(dat_dat, h("output", "mpx.csv"))
# pull jynneos --------------------------------------------------------------------------------
# jynneous_data <- lapply(list.files(h("data", "jynneos"), full.names = TRUE), function(x) {
# pull_date <- lubridate::as_datetime(str_remove(basename(x), "\\.csv"))
# pull_date <- lubridate::date(pull_date)
#
# jynneous_data <- data.table::fread(x)
#
# names(jynneous_data)[1] <- "Jurisdiction"
#
# jynneous_data <- data.table::melt(jynneous_data, id.vars = "Jurisdiction")
#
# jynneous_data[,value := gsub(pattern = "-", replacement = "0", value)]
#
# jynneous_data[ ,value := as.numeric(gsub(pattern = ",", replacement = "", value))]
#
# jyn_out <- jynneous_data[grepl("(T|t)otal",variable)][,Description := fcase(
# stringr::str_detect(string = variable, "Allocat"), "AllocatedCNT",
# stringr::str_detect(string = variable, "Shipped"), "ShippedCNT",
# stringr::str_detect(string = variable, "Requested"), "RequestedCNT"
# )][]
#
# jyn_out <- data.table::dcast(jyn_out, Jurisdiction ~ Description, value.var = "value")
#
# jyn_out <- jyn_out[,FilledPCT := ShippedCNT/RequestedCNT]
# jyn_out$DateDT <- pull_date
# jyn_out
# })
#
# jynneous_data <- rbindlist(jynneous_data, fill = TRUE)
#
# jynneous_data <- jynneous_data[,tail(.SD, 1), by = c("Jurisdiction","DateDT")]
#
# data.table::fwrite(jynneous_data, here::here("output", "jynneos.csv"))