-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathassignment1
112 lines (87 loc) · 2.6 KB
/
assignment1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#Part 1
#function to calculate pollutant mean
pollutantmean <- function(directory, pollutant, id = 1:332){
#create list of files with correct path
files_full <- list.files("specdata", full.names = TRUE)
#create empty data frame
dat <- data.frame()
#run loop that reads files specifed in id and bind them into new file
for (i in id) {
dat <- rbind(dat, read.csv(files_full[i]))
}
#calculate mean from subset above for specificed pollutant
mean(dat[, pollutant], na.rm = TRUE)
}
#Part 2
complete <- function(directory, id = 1:332) {
#create list of files with correct path
files_full <- list.files("specdata", full.names = TRUE)
#run loop that sums up complete cases
nobs <- numeric()
idx <- 1
for (i in id) {
nobs[idx] <- sum(complete.cases(read.csv(files_full[i])))
idx <- idx +1
}
part2 <- data.frame(id, nobs)
part2
}
#Part3
#create functiont that will find the cor of sulfate and nitrate only in stations with enough observations
corr <- function(directory, threshold = 0) {
crt <- vector(mode = "numeric", length = 332)
dat <- data.frame()
nobs <- numeric()
idx <- 1
for (i in 1:332) {
dat <- rbind(dat, read.csv(files_full[i]))
}
#clean data for only complete cases
dat <- dat[complete.cases(dat),]
#run loop with if/else statment that only includes data above indicated threshold; produce correlation for pollutants that are above threshold
for (i in 1:332) {
nobs[i] <- nrow(dat[dat$ID == i,])
if(nobs[i] > threshold)
crt[i] <- cor(dat$sulfate[dat$ID == i], dat$nitrate[dat$ID == i])
else
crt[i] <- NA
}
crt
}
#pratice code
cr <- corr("specdata", 2000)
cr <- sort(cr)
n <- length(cr)
print(n)
print(cr)
for (i in 1:332) {
crt[i] <- cor(dat2$sulfate[dat2$ID == i], dat2$nitrate[dat2$ID == i])
}
crt <- cor(dat2$sulfate[dat2$ID == "2"], dat2$nitrate[dat2$ID == "2"])
cor(top10$Age[top10$Gender == "F"], top10$Count[top10$Gender == "F"])
sum(dat2$nitrate[dat2$ID == "2",])
cor(dat2$sulfate, dat2$nitrate)
#question 7
set.seed(42)
cc <- complete("specdata", 332:1)
use <- sample(332, 10)
print(cc[use, "nobs"])
#question 8
cr <- corr("specdata")
cr <- sort(cr)
set.seed(868)
out <- round(cr[sample(length(cr), 5)], 4)
print(out)
#question 9
cr <- corr("specdata", 129)
cr <- sort(cr)
n <- length(cr)
set.seed(197)
out <- c(n, round(cr[sample(n, 5)], 4))
print(out)
#question 10
cr <- corr("specdata", 2000)
n <- length(cr)
cr <- corr("specdata", 1000)
cr <- sort(cr)
print(c(n, round(cr, 4)))