-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
144 lines (99 loc) · 3.64 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# Use function runAnalysis() to start creation of the
# tidy data set. All parameters are optional.
runAnalysis <- function(wdir = ".", targetFile = "tidy.txt") {
## Set working directory
setwd(wdir)
allData <- readData()
## Add activity informationm
allData <- addActivity(allData)
## Add subject to data set
allData <- addSubject(allData)
## Calculate mean per Subject/Activity
aggData <- calcMean(allData)
## Modify column names for usability
aggData <- makeColNames(aggData)
## Write to new file (tidy data set)
write.table(aggData,targetFile)
return(aggData)
}
readData <- function() {
## File names
testFile <- paste(rootDir(), "test", "X_test.txt", sep="/")
trainFile <- paste(rootDir(), "train", "X_train.txt", sep="/")
featFile <- paste(rootDir(), "features.txt", sep="/")
## Read test and training data
testData <- read.table(testFile)
trainData <- read.table(trainFile)
allData <- rbind(testData, trainData)
## Read feature names (will be used as column headers)
featNames <- read.table(featFile)[,2]
featNames <- as.vector(featNames)
## Set column names in data set
colnames(allData) <- featNames
## Get the columns representing the mean and standard deviation
cols <- c(grep("mean[(][)]", featNames), grep("std[(][)]", featNames))
## Keep only those columns
allData <- allData[,cols]
return(allData)
}
makeColNames <- function(sensorData) {
## Clean the column names for easier
## handling within R
names <- colnames(sensorData)
for(i in 1:length(names)){
## Remove parentesis from names
names[i] <- gsub("[(][)]", "", names[i])
## Convert to lower case
#names[i] <- tolower(names[i])
## Replace hyphens with dot
names[i] <- gsub("-", ".", names[i])
## Replace underscore with dot
names[i] <- gsub("_", ".", names[i])
}
colnames(sensorData) <- names
return(sensorData)
}
calcMean <- function(sensorData) {
# Aggregate data by Subject and Activity using mean() function
aggData <- aggregate(sensorData,list(Activity = sensorData$activity,
Subject = sensorData$subject), mean)
## Remove the two last columns (old activity + subject)
aggData <- aggData[,1:(dim(aggData)[2]-2)]
return(aggData)
}
addActivity <- function(sensorData) {
## File names
testActFile <- paste(rootDir(), "test", "y_test.txt", sep="/")
trainActFile <- paste(rootDir(), "train", "y_train.txt", sep="/")
labelFile <- paste(rootDir(), "activity_labels.txt", sep="/")
## Add activity information to data set
colIdx <- dim(sensorData)[2]+1
testAct <- read.table(testActFile)
trainAct <- read.table(trainActFile)
actAll <- rbind(testAct, trainAct)
## For the different activities, use
## readable names instead an integer
labels <- read.table(labelFile)
sensorData[,colIdx] <- apply(actAll, 1, function(x) labels[x,2])
## Set column name for the activity column
colnames(sensorData)[colIdx] <- "activity"
return(sensorData)
}
addSubject <- function(sensorData) {
## Calculate index of new column
colIdx <- dim(sensorData)[2] + 1
## Files containing subject daa
subTrain <- paste(rootDir(), "train", "subject_train.txt", sep="/")
subTest <- paste(rootDir(), "test", "subject_test.txt", sep="/")
## Read the files, merge them and append to sensorData
trainSub <- read.table(subTrain)
testSub <- read.table(subTest)
subAll <- rbind(testSub, trainSub)
sensorData[,colIdx] <- subAll
colnames(sensorData)[colIdx] <- "subject"
return(sensorData)
}
rootDir <- function() {
return("UCI HAR Dataset")
}
runAnalysis()