-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathzr.Rmd
197 lines (166 loc) · 7.22 KB
/
zr.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
---
title: "L'ORA - zoonoses report"
author: "Alfredo Acosta SVA"
date: "`r Sys.Date()`"
output:
html_document:
toc: yes
toc_float: true
theme: cosmo
fig_caption: yes
number_sections: yes
code_folding: show
html_notebook:
toc: yes
---
# Working example zoonoses
# Libraries
```{r}
# Spill over analysis
# Update: 17/01/2024
library(tidyverse)
library(lubridate)
library(stringr)
library(readr)
library(data.table)
library(plotly)
library(DT)
library(arsenal)
```
# Working directory
## Loading zoonoses report from zenodo locally adquired cases
```{r}
setwd("C:/Users/alfredo.acosta/SVA/LiRA_consortium - Documents/WG1 Disease occurrence/datasources/Second-disease-group/Zoonoses_report/")
zr <- read.csv2("PREVALENCE_2022.csv")
```
# Checking Colnames
```{r}
colnames(zr)
unique_count_values <- zr %>%
select(REPYEAR, REPCOUNTRY, ZOONOSIS, SPECIESTYPE, MATRIX) %>%
summarise(across(everything(), ~ list(unique(.)))) # Extract unique values for each column
#Number of unique values
# unique_count_values
# To print it as a more human-readable form:
unique_values <- unique_count_values %>%
mutate(across(everything(), ~ paste(unlist(.), collapse = ", ")))
# unique_values
# unique_values$REPYEAR
# unique(unique_values$REPCOUNTRY)
# unique(unique_values$ZOONOSIS)
# unique(unique_values$SPECIESTYPE)
# unique(unique_values$MATRIX)
```
# Filtering zoonoses name
```{r}
library(dplyr)
library(stringr)
zr2 <- zr %>%
filter(SPECIESTYPE == "animal") %>%
filter(str_detect(ZOONOSIS, "Coxiella") |
str_detect(ZOONOSIS, "Echinococcus") |
str_detect(ZOONOSIS, "Leishmania") |
str_detect(ZOONOSIS, "Echinococcus spp.")) %>%
filter(!str_detect(ZOONOSIS, "multiloc")) %>%
mutate(DISEASE = case_when(
str_detect(ZOONOSIS, "granulosus") ~ "Echinococcosis",
str_detect(ZOONOSIS, "Echinococcus") ~ "Echinococcosis",
str_detect(ZOONOSIS, "Coxiella|Q fever") ~ "Q fever",
TRUE ~ ZOONOSIS # Keeps the original ZOONOSIS value if no match is found
)) %>%
select(REPYEAR, ZOONOSIS, ZOONOSIS_C, REPCOUNTRY, MATRIX, MATRIX_C, DISEASE, TOTUNITSPOSITIVE, TOTUNITSTESTED, SAMPUNIT)
# unique(zr2$REPYEAR)
# unique(zr2$REPCOUNTRY)
# unique(zr2$ZOONOSIS_C)
# unique(zr2$MATRIX)
# unique(zr2$DISEASE)
```
# Diseases codes (diseases-zoonoses_C)
```{r}
table(zr2$ZOONOSIS_C, zr2$DISEASE)
```
# Number of reports availables
```{r}
datatable(zr2 %>%
group_by(REPYEAR, DISEASE, REPCOUNTRY) %>%
summarize(number_reports=(sum(TOTUNITSPOSITIVE, na.rm = TRUE))) %>%
arrange(DISEASE, desc(number_reports)))
```
# Number of reports by type of study
```{r}
datatable(zr2 %>%
group_by(REPYEAR, DISEASE, REPCOUNTRY, SAMPUNIT) %>%
summarize(number_reports=(sum(TOTUNITSPOSITIVE, na.rm = TRUE))) %>%
spread(key = SAMPUNIT , value = number_reports))
```
# Species
## from 78 species we reorganize them to 13
```{r}
zr2 %>% group_by(MATRIX) %>%
summarise(reports=n())
zr2 <- mutate(zr2, species_fg = case_when(
str_detect(MATRIX, "Solipeds") ~ "Domesticated Equids",
str_detect(MATRIX, "Pigs") ~ "Pigs",
str_detect(MATRIX, "Goat") ~"Goats",
str_detect(MATRIX, "Sheep") ~ "Sheep",
str_detect(MATRIX, "Cattle") ~ "Cattle",
str_detect(MATRIX, "Deer") ~ "Wildlife",
str_detect(MATRIX, "Dogs") ~"Dogs",
str_detect(MATRIX, "Reindeer") ~"Reindeer",
str_detect(MATRIX, "Cats") ~ "Cats",
str_detect(MATRIX, regex("wild", ignore_case = TRUE)) ~ "Wildlife",
str_detect(MATRIX, "Wild boar") ~ "Wild boars",
str_detect(MATRIX, regex("Fox", ignore_case = TRUE)) ~ "Foxes",
MATRIX == "Badgers"| MATRIX == "Deer" | MATRIX == "Coypu"|MATRIX == "Hares" |
MATRIX == "Mouflons" | MATRIX =="Raccoons" | MATRIX == "Squirrels" ~ "Wildlife",
str_detect(MATRIX, regex("Zoo", ignore_case = TRUE)) ~ "Zoo_animal",
TRUE ~ MATRIX))
#13 species found
zr2 %>% group_by(species_fg) %>%
summarise(reports=n())
# species and disease
zr2 %>% group_by(DISEASE, species_fg) %>%
summarise(reports=n())
zr2 %>% group_by(species_fg, DISEASE) %>%
summarise(reports=n())
```
# Assigning functional groups
## Acording how it appears in the last code chunk and comparison witn WG3 list
```{r}
zr2$fg <- "0"
zr2$fg[zr2$species_fg == "Alpacas - farmed" & zr2$DISEASE == "Q fever"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Cats" & zr2$DISEASE == "Echinococcosis"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Cattle" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Cattle" & zr2$DISEASE == "Q fever"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Dogs" & zr2$DISEASE == "Echinococcosis"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Dogs" & zr2$DISEASE == "Leishmania"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Dogs" & zr2$DISEASE == "Q fever"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Domesticated Equids" & zr2$DISEASE == "Q fever"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Domesticated Equids" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Foxes" & zr2$DISEASE == "Echinococcosis"] <- "Wildlife amplification"
zr2$fg[zr2$species_fg == "Goats" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Goats" & zr2$DISEASE == "Q fever"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Goats" & zr2$DISEASE == "Q fever"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Pigs" & zr2$DISEASE == "Q fever"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Pigs" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Reindeer" & zr2$DISEASE == "Echinococcosis"] <- "Wildlife amplification" # not sure
zr2$fg[zr2$species_fg == "Sheep" & zr2$DISEASE == "Echinococcosis"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Sheep" & zr2$DISEASE == "Q fever"] <- "Domestic amplification"
zr2$fg[zr2$species_fg == "Water buffalos" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Water buffalos" & zr2$DISEASE == "Q fever"] <- "Domestic spillover"
zr2$fg[zr2$species_fg == "Wildlife" & zr2$DISEASE == "Q fever"] <- "Wildlife spillover" #only because is the most common
zr2$fg[zr2$species_fg == "Wildlife" & zr2$DISEASE == "Echinococcosis"] <- "Wildlife amplification" #only because is the most common
zr2$fg[zr2$species_fg == "Zoo_animal" & zr2$DISEASE == "Echinococcosis"] <- "Domestic spillover" #Because they are confined could be a dead end "spillover" also those animals receive veterinary attention, diagnosis, these reported cases would have had a desition and reduced their possibility to transmit "amplification"
zr2$fg[zr2$species_fg == "Zoo_animal" & zr2$DISEASE == "Q fever"] <- "Domestic spillover" #Same consideration
zr2 %>% group_by(species_fg, DISEASE, fg) %>%
summarise(reports=n())
```
```{r}
species_fg <- zr2 %>%
group_by(MATRIX_C, MATRIX, DISEASE, fg) %>%
summarise(number=n()) %>%
arrange(desc(number))
# write.csv(species_fg, file = "species_fg_zoonoses.csv")
```
# Credits Acosta, A.; Ernholm, L. <sup>1</sup>.
**SVA<sup>1</sup>**: SVA <http://www.sva.se/>.