forked from GMRI-SEL/ISEL_CodeforGitGitHubTutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathISEL_CodeforGitGitHubTutorial_pt3.R
126 lines (95 loc) · 4.35 KB
/
ISEL_CodeforGitGitHubTutorial_pt3.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# GMRI Integrated Systems Ecology Lab: Git, GitHub and You example code
#####
## You, working by yourself
#####
# Data prep and exploration -----------------------------------------------
## Data prep using preloaded "trees" dataset in R. As a clean dataset, there really isn't much prep other than loading in the dataset and checking out its structure.
# Load it in
data(trees)
# Preliminary inspection: format, structure, dimensions
head(trees)
str(trees)
dim(trees)
colnames(trees)
# Little tree ecology for us marine folks: tree height and girth are commonly measured, while measuring tree volume is more difficult and less appealing as it requires either cutting down the tree, or climing all over it and taking a lot of precise measurements. Ideally, we'd like to be able to build a model that relates height and girth to tree volume.
## Exploration
# For this we are going to go outside base R functions and use a lot of functions from libraries packaged in the "tidyverse" library. So, let's install it (if we need to) and then load it.
library_check<- function(libraries) {
## Details
# This function will check for and then either load or install any libraries needed to run subsequent functions
# Args:
# libraries = Vector of required library names
# Returns: NA, just downloads/loads libraries into current work space.
## Start function
lapply(libraries, FUN = function(x) {
if (!require(x, character.only = TRUE)) {
install.packages(x, dependencies = TRUE)
library(x, character.only = TRUE)
}
})
## End function
}
library_check(c("tidyverse", "cowplot", "GGally", "plotly", "broom"))
# Hopefully that all worked okay. A billion different ways to visualize these data, so just a few here...
# Variable histogram plots
girth.hist<- ggplot(data = trees) +
geom_histogram(mapping = aes(x = Girth))
height.hist<- ggplot(data = trees) +
geom_histogram(mapping = aes(x = Height))
vol.hist<- ggplot(data = trees) +
geom_histogram(mapping = aes(x = Volume))
plot_grid(girth.hist, height.hist, vol.hist, labels = c("Girth", "Height", "Volume"), nrow = 1, align = "hv")
# Covariation plot
ggpairs(trees)
# Data modeling: linear regression ----------------------------------------
## Simple linear regression model for volume as a function of girth
# Fit the model
girth.mod<- lm(Volume ~ Girth, data = trees)
# Diagnostics
plot(girth.mod)
# Inferences and evaluation
summary(girth.mod)
# Visualize the model
ggplot(data = trees, aes(x = Girth, y = Volume)) +
geom_point() +
stat_smooth(method = "lm", col = "dodgerblue3") +
theme(panel.background = element_rect(fill = "white"),
axis.line.x=element_line(),
axis.line.y=element_line()) +
ggtitle("Linear Model Fitted to Data")
#####
## You, now using some code that was written by someone else
#####
# Data modeling: linear regression using both girth and height
# Fit the model
girth.height.mod<- lm(Volume ~ Girth + Height, data = trees)
# Diagnostics
plot(girth.height.mod)
# Inferences and evaluation
summary(girth.height.mod)
# Visualize the model using a surface
Girth<- seq(9,21, by=0.5)
Height<- seq(60,90, by=0.5)
pred.grid<- expand.grid(Girth = Girth, Height = Height)
# Predict from these value vectors using the model
pred.grid$Pred.Volume<-predict(girth.height.mod, new = pred.grid)
# Visualize it
plot_ly(data = pred.grid, z = ~Pred.Volume, x = ~Girth, y = ~Height, opacity = 0.5) %>%
add_markers(marker = list(size = 2), name = "predicted") %>%
add_markers(data = trees, z = ~Volume, x = ~Girth, y = ~Height, marker = list(color = "black", size = 4), name = "Observed")
#####
## You, now coding collaboratively with someone else
#####
# AJA Notes 4/24: Looked like there might be an interaction going on between height and girth. Giving that a shot and then looking at the model comparisons.
# Data modeling: linear regression using both girth and height and interaction
# Fit the model
girth.height.int.mod<- lm(Volume ~ Girth + Height + Girth*Height, data = trees)
# Diagnostics
plot(girth.height.int.mod)
# Inferences and evaluation
summary(girth.height.int.mod)
# Comparison -- definitely better way to do this, but for now:
mod.comp<- data.frame("Model" = c("Girth", "Girth+Height", "Girth+Height+Int"), bind_rows(glance(girth.mod), glance(girth.height.mod), glance(girth.height.int.mod)))
mod.comp
# Someone else's notes -- looks great
print(paste("Looks great"))