-
Notifications
You must be signed in to change notification settings - Fork 58
/
Copy pathIntroR_worksheet.R
173 lines (119 loc) · 4.2 KB
/
IntroR_worksheet.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#### 1. Basic R Use (Demonstration) ####
## use R as a calculator
3 + 4 # addition
2 * 6 # multiplication
4 / 2 # division
2^3 # powers
## create a vector
c(1, 5, 4)
## asign results to an object
## note that after it is assigned, it shows up in RStudio in the "Environment"
x <- c(1, 3, 5)
## view an existing object
x
## round data (helpful for reporting)
## the first argument is the number to round
## the second argument is how many digits to use for rounding
round(1.214294254, digits = 2)
#### 2. Descriptive Statistics (Activity) ####
## calculate the mean
## note that we reuse the previously assigned variable, x
mean(x)
## calculate the median
median(x)
## calculate the standard deviation
sd(x)
## minimum
min(x)
## maximum
max(x)
## if there are missing values, R will not calculate
## to see this, lets first create an object with some missing data
## we will call it "y"
## NA stands for Not Available, i.e., missing
y <- c(1, 3, NA, 7)
## calculate mean on y
mean(y)
## to all the descriptives, we need to tell R
## to remove missing values first
## (na for not available; rm for remove)
## by adding an argument, na.rm = TRUE
mean(y, na.rm = TRUE)
#### 2b. You Try It ####
## find the mean of these numbers: 5, 3, 2, 9, 1
## find the standard deviation of the variable "y"
#### 3. Using a Dataset (Demonstration) ####
## R has a built in dataset called "mtcars"
## this dataset has variables about 32 different cars
## view the dataset
View(mtcars)
## one the variables in the dataset is how many
## miles per gallon of petrol each car gets
## this variable is called "mpg"
## to access the variable from within the mtcars dataset
## we use the "$" operator
## the code below accesses and prints all the observations
## from the mpg variable
mtcars$mpg
## note what happens if the case is wrong
## because a variable called "Mpg" does not exist
## R return NULL indicating no data
mtcars$Mpg
## calculate descriptive statistics
mean(mtcars$mpg)
sd(mtcars$mpg)
## it is also possible to round an entire set of numbers
round(mtcars$mpg, digits = 0)
## summary of a whole dataset
summary(mtcars)
#### 4. Loading Data (Demonstration) ####
## to start with, we will load a package for data management
## loading a package is like opening an app
## and you need to repeat this process each time you start up R
## note that if this does not work, try to install it first
## by uncommenting the install packages code
# install.packages("data.table", dependencies = TRUE)
library(data.table)
## read in a sample data set from the internet
d <- fread("https://raw.githubusercontent.com/JWiley/MonashHonoursStatistics/master/IntroR_sample.csv")
## get a summary of the data
summary(d)
## load package to read Excel files (.xls or .xlsx)
## if not installed please uncomment and run code below
# install.packages("readxl", dependencies = TRUE)
library(readxl)
## make sure you saved this Excel data to your R project folder
## names of all the Excel sheets
excel_sheets("actigraph_scored_31.xlsx")
## read in the "Sleep" sheet
d.acti <- read_excel("actigraph_scored_31.xlsx", sheet = "Sleep")
## view the variable names in the dataset
names(d.acti)
## calculate the mean sleep duration (total sleep time; TST)
mean(d.acti$TST)
#### 4b. You Try It ####
## view the Sleep Efficiency (se) variable in d.acti
## what is the second value?
## what are the variable names in the "Summary" sheet?
## the first variable is "ID"
#### 5. Logical Operators (activity) ####
## "==" : logical test if Depressed is equal to 1
d$Depressed == 1
## ">" : logical test whether zStress is greater than 0
d$zStress > 0
## "|" : logical "or"; test whether either condition is TRUE
## depressed or high stress
d$Depressed == 1 | d$zStress > 1
## "&" : logical "and"; test whether both conditions are TRUE
## depressed and low stress
d$Depressed == 1 & d$zStress < 1
## we can use square brackets, [], to subset a variable or dataset
## we can subset by number or by logical value.
## here are all the values for zStress
d$zStress
## here is just the first and third value
d$zStress[c(1, 3)]
## here are just the values of zStress where Depressed == 1
d$zStress[d$Depressed == 1]
## here are just the values of zStress where zStress > 1
d$zStress[d$zStress > 1]