-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.R
37 lines (25 loc) · 953 Bytes
/
scrape.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# Purpose: retrieve human case data from the web
library(rvest)
library(data.table)
library(here)
uri <- "https://www.cdc.gov/bird-flu/situation-summary/index.html"
# read the html
page <- read_html(uri)
# extract the table
table <- html_table(page)
table_dt <- setDT(table[[1]])
table_dt_long <- melt(table_dt, id.vars = c("State"), variable.name = "Source", value.name = "Cases")
table_dt_long[, Cases := as.numeric(Cases)]
table_dt_long[, UpdateDTS := Sys.time()]
# write to disk
fwrite(table_dt_long, here("data-raw", "human_cases.csv"), append = TRUE)
# Get WW data
url <- "https://www.cdc.gov/wcms/vizdata/NCEZID_DIDRI/FluA/H5N1Table.json"
library(httr)
tmp <- tempfile()
httr::GET(url, write_disk(tmp))
dat <- jsonlite::fromJSON(tmp)
dat$UpdateDTS <- Sys.time()
dat_long <- dat |>
tidyr::gather(DateDT, StatusDT, -Sewershed, -`State/Territory`, -County, -UpdateDTS)
fwrite(dat_long, here("data-raw", "ww_cases.csv"), append = TRUE)