From 5c7bbe744d13497757ecaca16d4d283d0e0efca2 Mon Sep 17 00:00:00 2001
From: Dan Halligan <dan.halligan@gmail.com>
Date: Fri, 3 Jan 2025 09:51:40 +0000
Subject: [PATCH] Formatting updates

---
 02-statistical-learning.Rmd                |  28 +++--
 03-linear-regression.Rmd                   |  36 +++---
 04-classification.Rmd                      |  45 ++++---
 07-moving-beyond-linearity.Rmd             |  22 ++--
 08-tree-based-methods.Rmd                  | 115 +++++++++--------
 09-support-vector-mechines.Rmd             | 140 +++++++++++----------
 10-deep-learning.Rmd                       |   2 +-
 11-survival-analysis-and-censored-data.Rmd |   4 +-
 12-unsupervised-learning.Rmd               |  22 ++--
 9 files changed, 226 insertions(+), 188 deletions(-)

diff --git a/02-statistical-learning.Rmd b/02-statistical-learning.Rmd
index 7857664..55ac52b 100644
--- a/02-statistical-learning.Rmd
+++ b/02-statistical-learning.Rmd
@@ -302,7 +302,7 @@ college$Elite <- factor(ifelse(college$Top10perc > 50, "Yes", "No"))
 summary(college$Elite)
 plot(college$Outstate ~ college$Elite, xlab = "Elite", ylab = "Outstate")
 
-par(mfrow = c(2,2))
+par(mfrow = c(2, 2))
 for (n in c(5, 10, 20, 50)) {
   hist(college$Enroll, breaks = n, main = paste("n =", n), xlab = "Enroll")
 }
@@ -374,7 +374,7 @@ x[-(10:85), numeric] |>
 >    the relationships among the predictors. Comment on your findings.
 
 ```{r}
-pairs(x[, numeric], cex = 0.2) 
+pairs(x[, numeric], cex = 0.2)
 cor(x[, numeric]) |>
   kable()
 
@@ -425,8 +425,10 @@ library(tidyverse)
 ```
 
 ```{r}
-ggplot(Boston, aes(nox, rm)) + geom_point()
-ggplot(Boston, aes(ptratio, rm)) + geom_point()
+ggplot(Boston, aes(nox, rm)) +
+  geom_point()
+ggplot(Boston, aes(ptratio, rm)) +
+  geom_point()
 heatmap(cor(Boston, method = "spearman"), cexRow = 1.1, cexCol = 1.1)
 ```
 
@@ -440,12 +442,12 @@ Yes
 >    predictor.
 
 ```{r}
-Boston |> 
-  pivot_longer(cols = 1:13) |> 
-  filter(name %in% c("crim", "tax", "ptratio")) |> 
-  ggplot(aes(value)) + 
-    geom_histogram(bins = 20) + 
-    facet_wrap(~name, scales="free", ncol= 1)
+Boston |>
+  pivot_longer(cols = 1:13) |>
+  filter(name %in% c("crim", "tax", "ptratio")) |>
+  ggplot(aes(value)) +
+  geom_histogram(bins = 20) +
+  facet_wrap(~name, scales = "free", ncol = 1)
 ```
 
 Yes, particularly crime and tax rates.
@@ -496,9 +498,9 @@ Boston |>
   select(-c(crim, zn)) |>
   pivot_longer(!rm) |>
   mutate(">8 rooms" = rm > 8) |>
-  ggplot(aes(`>8 rooms`, value)) + 
-    geom_boxplot() + 
-    facet_wrap(~name, scales = "free")
+  ggplot(aes(`>8 rooms`, value)) +
+  geom_boxplot() +
+  facet_wrap(~name, scales = "free")
 ```
 
 Census tracts with big average properties (more than eight rooms per dwelling)
diff --git a/03-linear-regression.Rmd b/03-linear-regression.Rmd
index 98a1b5a..5a95967 100644
--- a/03-linear-regression.Rmd
+++ b/03-linear-regression.Rmd
@@ -67,11 +67,11 @@ library(plotly)
 ```{r}
 model <- function(gpa, iq, level) {
   50 +
-  gpa * 20 +
-  iq * 0.07 +
-  level * 35 +
-  gpa * iq * 0.01 +
-  gpa * level * -10
+    gpa * 20 +
+    iq * 0.07 +
+    level * 35 +
+    gpa * iq * 0.01 +
+    gpa * level * -10
 }
 x <- seq(1, 5, length = 10)
 y <- seq(1, 200, length = 20)
@@ -82,15 +82,18 @@ plot_ly(x = x, y = y) |>
   add_surface(
     z = ~college,
     colorscale = list(c(0, 1), c("rgb(107,184,214)", "rgb(0,90,124)")),
-    colorbar = list(title = "College")) |>
+    colorbar = list(title = "College")
+  ) |>
   add_surface(
     z = ~high_school,
     colorscale = list(c(0, 1), c("rgb(255,112,184)", "rgb(128,0,64)")),
-    colorbar = list(title = "High school")) |>
+    colorbar = list(title = "High school")
+  ) |>
   layout(scene = list(
     xaxis = list(title = "GPA"),
     yaxis = list(title = "IQ"),
-    zaxis = list(title = "Salary")))
+    zaxis = list(title = "Salary")
+  ))
 ```
 
 Option iii correct.
@@ -366,7 +369,7 @@ par(mfrow = c(2, 2))
 plot(Auto$horsepower, Auto$mpg, cex = 0.2)
 plot(log(Auto$horsepower), Auto$mpg, cex = 0.2)
 plot(sqrt(Auto$horsepower), Auto$mpg, cex = 0.2)
-plot(Auto$horsepower ^ 2, Auto$mpg, cex = 0.2)
+plot(Auto$horsepower^2, Auto$mpg, cex = 0.2)
 
 x <- subset(Auto, select = -name)
 x$horsepower <- log(x$horsepower)
@@ -553,7 +556,7 @@ We can show this numerically in R by computing $t$ using the above equation.
 
 ```{r}
 n <- length(x)
-sqrt(n - 1) * sum(x * y) / sqrt(sum(x ^ 2) * sum(y ^ 2) - sum(x * y) ^ 2)
+sqrt(n - 1) * sum(x * y) / sqrt(sum(x^2) * sum(y^2) - sum(x * y)^2)
 ```
 
 > e. Using the results from (d), argue that the _t_-statistic for the
@@ -846,9 +849,9 @@ contributions.
 >    answers.
 
 ```{r}
-x1 <- c(x1 , 0.1)
-x2 <- c(x2 , 0.8)
-y <- c(y ,6)
+x1 <- c(x1, 0.1)
+x2 <- c(x2, 0.8)
+y <- c(y, 6)
 summary(lm(y ~ x1 + x2))
 summary(lm(y ~ x1))
 summary(lm(y ~ x2))
@@ -929,9 +932,10 @@ The results from (b) show reduced significance compared to the models fit in
 (a). 
 
 ```{r}
-plot(sapply(fits, function(x) coef(x)[2]), coef(mfit)[-1], 
-  xlab = "Univariate regression", 
-  ylab = "multiple regression")
+plot(sapply(fits, function(x) coef(x)[2]), coef(mfit)[-1],
+  xlab = "Univariate regression",
+  ylab = "multiple regression"
+)
 ```
 
 The estimated coefficients differ (in particular the estimated coefficient for
diff --git a/04-classification.Rmd b/04-classification.Rmd
index 84775e5..ca88a0e 100644
--- a/04-classification.Rmd
+++ b/04-classification.Rmd
@@ -23,11 +23,10 @@ $$
 Letting $x = e^{\beta_0 + \beta_1X}$
 
 \begin{align}
-\frac{P(X)}{1-p(X)} &= \frac{\frac{x}{1 + x}}
-                            {1 - \frac{x}{1 + x}} \\
-              &= \frac{\frac{x}{1 + x}}
-                      {\frac{1}{1 + x}} \\
-              &= x
+\frac{P(X)}{1-p(X)} 
+  &= \frac{\frac{x}{1 + x}} {1 - \frac{x}{1 + x}} \\
+  &= \frac{\frac{x}{1 + x}} {\frac{1}{1 + x}} \\
+  &= x
 \end{align}
 
 ### Question 2
@@ -65,7 +64,7 @@ therefore, we can consider maximizing $\log(p_K(X))$
 
 $$
 \log(p_k(x)) = \log(\pi_k) - \frac{1}{2\sigma^2}(x - \mu_k)^2 -
-              \log\left(\sum_{l=1}^k \pi_l \exp\left(-\frac{1}{2\sigma^2}(x - \mu_l)^2\right)\right)
+               \log\left(\sum_{l=1}^k \pi_l \exp\left(-\frac{1}{2\sigma^2}(x - \mu_l)^2\right)\right)
 $$
 
 Remember that we are maximizing over $k$, and since the last term does not
@@ -265,7 +264,7 @@ when $X_1 = 40$ and $X_2 = 3.5$, $p(X) = 0.38$
 >    chance of getting an A in the class?
 
 We would like to solve for $X_1$ where $p(X) = 0.5$. Taking the first equation
-above, we need to solve $0 = −6 + 0.05X_1 + 3.5$, so $X_1 = 50$ hours.
+above, we need to solve $0 = -6 + 0.05X_1 + 3.5$, so $X_1 = 50$ hours.
 
 ### Question 7
 
@@ -305,7 +304,7 @@ p(D|v) &= \frac{p(v|D) p(D)}{p(v|D)p(D) + p(v|N)p(N)} \\
 \end{align}
 
 ```{r}
-exp(-0.5) * 0.8 / (exp(-0.5) * 0.8 + exp(-2/9) * 0.2)
+exp(-0.5) * 0.8 / (exp(-0.5) * 0.8 + exp(-2 / 9) * 0.2)
 ```
 
 ### Question 8
@@ -412,7 +411,7 @@ $$
 (\hat\alpha_{orange0} - \hat\alpha_{apple0}) + (\hat\alpha_{orange1} - \hat\alpha_{apple1})x
 $$
 
-> c. Suppose that in your model, $\hat\beta_0 = 2$ and $\hat\beta = −1$. What
+> c. Suppose that in your model, $\hat\beta_0 = 2$ and $\hat\beta = -1$. What
 >    are the coefficient estimates in your friend's model? Be as specific as
 >    possible.
 
@@ -423,7 +422,7 @@ We are unable to know the specific value of each parameter however.
 
 > d. Now suppose that you and your friend fit the same two models on a different
 >    data set. This time, your friend gets the coefficient estimates 
->    $\hat\alpha_{orange0} = 1.2$, $\hat\alpha_{orange1} = −2$, 
+>    $\hat\alpha_{orange0} = 1.2$, $\hat\alpha_{orange1} = -2$, 
 >    $\hat\alpha_{apple0} = 3$, $\hat\alpha_{apple1} = 0.6$. What are the 
 >    coefficient estimates in your model?
 
@@ -571,7 +570,7 @@ fit <- glm(Direction ~ Lag3, data = Weekly[train, ], family = binomial)
 pred <- predict(fit, Weekly[!train, ], type = "response") > 0.5
 mean(ifelse(pred, "Up", "Down") == Weekly[!train, ]$Direction)
 
-fit <- glm(Direction ~Lag4, data = Weekly[train, ], family = binomial)
+fit <- glm(Direction ~ Lag4, data = Weekly[train, ], family = binomial)
 pred <- predict(fit, Weekly[!train, ], type = "response") > 0.5
 mean(ifelse(pred, "Up", "Down") == Weekly[!train, ]$Direction)
 
@@ -583,7 +582,7 @@ fit <- glm(Direction ~ Lag1 * Lag2 * Lag3 * Lag4, data = Weekly[train, ], family
 pred <- predict(fit, Weekly[!train, ], type = "response") > 0.5
 mean(ifelse(pred, "Up", "Down") == Weekly[!train, ]$Direction)
 
-fit <- lda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4,data = Weekly[train, ])
+fit <- lda(Direction ~ Lag1 + Lag2 + Lag3 + Lag4, data = Weekly[train, ])
 pred <- predict(fit, Weekly[!train, ], type = "response")$class
 mean(pred == Weekly[!train, ]$Direction)
 
@@ -658,7 +657,7 @@ variables are colinear.
 
 ```{r}
 set.seed(1)
-train <- sample(seq_len(nrow(x)), nrow(x) * 2/3)
+train <- sample(seq_len(nrow(x)), nrow(x) * 2 / 3)
 ```
 
 > d. Perform LDA on the training data in order to predict `mpg01` using the
@@ -787,8 +786,8 @@ Power3 <- function(x, a) {
 >    `log = "y"`, or `log = "xy"` as arguments to the `plot()` function.
 
 ```{r}
-plot(1:10, Power3(1:10, 2), 
-  xlab = "x", 
+plot(1:10, Power3(1:10, 2),
+  xlab = "x",
   ylab = expression(paste("x"^"2")),
   log = "y"
 )
@@ -806,7 +805,7 @@ plot(1:10, Power3(1:10, 2),
 ```{r}
 PlotPower <- function(x, a, log = "y") {
   plot(x, Power3(x, a),
-    xlab = "x", 
+    xlab = "x",
     ylab = substitute("x"^a, list(a = a)),
     log = log
   )
@@ -827,11 +826,11 @@ PlotPower(1:10, 3)
 
 ```{r}
 x <- cbind(
-  ISLR2::Boston[, -1], 
+  ISLR2::Boston[, -1],
   data.frame("highcrim" = Boston$crim > median(Boston$crim))
 )
 set.seed(1)
-train <- sample(seq_len(nrow(x)), nrow(x) * 2/3)
+train <- sample(seq_len(nrow(x)), nrow(x) * 2 / 3)
 ```
 
 We can find the most associated variables by performing wilcox tests.
@@ -861,8 +860,8 @@ Let's look at univariate associations with `highcrim` (in the training data)
 x[train, ] |>
   pivot_longer(!highcrim) |>
   mutate(name = factor(name, levels = ord)) |>
-  ggplot(aes(highcrim, value)) + 
-  geom_boxplot() + 
+  ggplot(aes(highcrim, value)) +
+  geom_boxplot() +
   facet_wrap(~name, scale = "free")
 ```
 
@@ -902,9 +901,9 @@ res <- sapply(1:12, function(max) fit_models(1:max))
 res <- as_tibble(t(res))
 res$n_var <- 1:12
 pivot_longer(res, cols = !n_var) |>
-  ggplot(aes(n_var, value, col = name)) + 
-  geom_line() + 
-  xlab("Number of predictors") + 
+  ggplot(aes(n_var, value, col = name)) +
+  geom_line() +
+  xlab("Number of predictors") +
   ylab("Error rate")
 ```
 
diff --git a/07-moving-beyond-linearity.Rmd b/07-moving-beyond-linearity.Rmd
index a7140dd..b5e0e29 100644
--- a/07-moving-beyond-linearity.Rmd
+++ b/07-moving-beyond-linearity.Rmd
@@ -175,9 +175,9 @@ grid()
 
 ```{r}
 x <- seq(-2, 6, length.out = 1000)
-b1 <- function(x) I(0 <= x & x <= 2) - (x - 1) * I(1 <= x &  x <= 2)
-b2 <- function(x) (x - 3) * I(3 <= x  & x <= 4) + I(4 < x & x <= 5)
-f <- function(x) 1 + 1*b1(x) + 3*b2(x)
+b1 <- function(x) I(0 <= x & x <= 2) - (x - 1) * I(1 <= x & x <= 2)
+b2 <- function(x) (x - 3) * I(3 <= x & x <= 4) + I(4 < x & x <= 5)
+f <- function(x) 1 + 1 * b1(x) + 3 * b2(x)
 plot(x, f(x), type = "l")
 grid()
 ```
@@ -364,7 +364,7 @@ err5 <- mean(replicate(10, {
 c(err, err1, err2, err3, err4, err5)
 anova(fit, fit1, fit2, fit3, fit4, fit5)
 
-x <- seq(min(Auto$horsepower), max(Auto$horsepower), length.out=1000)
+x <- seq(min(Auto$horsepower), max(Auto$horsepower), length.out = 1000)
 pred <- data.frame(
   x = x,
   "Linear" = predict(fit, data.frame(horsepower = x)),
@@ -407,7 +407,7 @@ lines(x, predict(fit, data.frame(dis = x)), col = "red", lty = 2)
 ```{r}
 fits <- lapply(1:10, function(i) glm(nox ~ poly(dis, i), data = Boston))
 
-x <- seq(min(Boston$dis), max(Boston$dis), length.out=1000)
+x <- seq(min(Boston$dis), max(Boston$dis), length.out = 1000)
 pred <- data.frame(lapply(fits, function(fit) predict(fit, data.frame(dis = x))))
 colnames(pred) <- 1:10
 pred$x <- x
@@ -604,7 +604,7 @@ beta1 <- 20
 >    ```
 
 ```{r}
-a <- y - beta1*x1
+a <- y - beta1 * x1
 beta2 <- lm(a ~ x2)$coef[2]
 ```
 
@@ -633,15 +633,15 @@ res <- matrix(NA, nrow = 1000, ncol = 3)
 colnames(res) <- c("beta0", "beta1", "beta2")
 beta1 <- 20
 for (i in 1:1000) {
-  beta2 <- lm(y - beta1*x1 ~ x2)$coef[2]
-  beta1 <- lm(y - beta2*x2 ~ x1)$coef[2]
-  beta0 <- lm(y - beta2*x2 ~ x1)$coef[1]
+  beta2 <- lm(y - beta1 * x1 ~ x2)$coef[2]
+  beta1 <- lm(y - beta2 * x2 ~ x1)$coef[2]
+  beta0 <- lm(y - beta2 * x2 ~ x1)$coef[1]
   res[i, ] <- c(beta0, beta1, beta2)
 }
 res <- as.data.frame(res)
 res$Iteration <- 1:1000
 res <- pivot_longer(res, !Iteration)
-p <- ggplot(res, aes(x=Iteration, y=value, color=name)) +
+p <- ggplot(res, aes(x = Iteration, y = value, color = name)) +
   geom_line() +
   geom_point() +
   scale_x_continuous(trans = "log10")
@@ -682,7 +682,7 @@ n <- 1000
 
 betas <- rnorm(p) * 5
 x <- matrix(rnorm(n * p), ncol = p, nrow = n)
-y <- (x %*% betas) + rnorm(n)  # ignore beta0 for simplicity
+y <- (x %*% betas) + rnorm(n) # ignore beta0 for simplicity
 
 # multiple regression
 fit <- lm(y ~ x - 1)
diff --git a/08-tree-based-methods.Rmd b/08-tree-based-methods.Rmd
index 3ea2813..87fe01c 100644
--- a/08-tree-based-methods.Rmd
+++ b/08-tree-based-methods.Rmd
@@ -25,13 +25,15 @@ tree <- ape::read.tree(text = "(((R1:1,R2:1)N1:2,R3:4)N2:2,(R4:2,(R5:1,R6:1)R3:2
 tree$node.label <- c("Age < 40", "Weight < 100", "Weight < 70", "Age < 60", "Weight < 80")
 
 ggtree(tree, ladderize = FALSE) + scale_x_reverse() + coord_flip() +
-  geom_tiplab(vjust = 2, hjust = 0.5) + 
-  geom_text2(aes(label=label, subset=!isTip), hjust = -0.1, vjust = -1)
+  geom_tiplab(vjust = 2, hjust = 0.5) +
+  geom_text2(aes(label = label, subset = !isTip), hjust = -0.1, vjust = -1)
 ```
 
 ```{r}
-plot(NULL, xlab="Age (years)", ylab="Weight (kg)", 
-  xlim = c(0, 100), ylim = c(40, 160), xaxs = "i", yaxs = "i")
+plot(NULL,
+  xlab = "Age (years)", ylab = "Weight (kg)",
+  xlim = c(0, 100), ylim = c(40, 160), xaxs = "i", yaxs = "i"
+)
 abline(v = 40, col = "red", lty = 2)
 lines(c(0, 40), c(100, 100), col = "blue", lty = 2)
 lines(c(0, 40), c(70, 70), col = "blue", lty = 2)
@@ -39,8 +41,8 @@ abline(v = 60, col = "red", lty = 2)
 lines(c(60, 100), c(80, 80), col = "blue", lty = 2)
 
 text(
-  c(20, 20, 20, 50, 80, 80), 
-  c(55, 85, 130, 100, 60, 120), 
+  c(20, 20, 20, 50, 80, 80),
+  c(55, 85, 130, 100, 60, 120),
   labels = c("R1", "R2", "R3", "R4", "R5", "R6")
 )
 ```
@@ -92,15 +94,15 @@ $$E = 1 - \max_k(\hat{p}_{mk})$$
 # Function definitions are for when there's two classes only
 p <- seq(0, 1, length.out = 100)
 data.frame(
-    x = p,
-    "Gini index" = p * (1 - p) * 2,
-    "Entropy" = -(p * log(p) + (1 - p) * log(1 - p)),
-    "Classification error" = 1 - pmax(p, 1 - p),
-    check.names = FALSE
-  ) |>
+  x = p,
+  "Gini index" = p * (1 - p) * 2,
+  "Entropy" = -(p * log(p) + (1 - p) * log(1 - p)),
+  "Classification error" = 1 - pmax(p, 1 - p),
+  check.names = FALSE
+) |>
   pivot_longer(!x) |>
-  ggplot(aes(x = x, y = value, color = name)) + 
-    geom_line(na.rm = TRUE)
+  ggplot(aes(x = x, y = value, color = name)) +
+  geom_line(na.rm = TRUE)
 ```
 
 ### Question 4
@@ -116,8 +118,8 @@ tree <- ape::read.tree(text = "(((3:1.5,(10:1,0:1)A:1)B:1,15:2)C:1,5:2)D;")
 tree$node.label <- c("X1 < 1", "X2 < 1", "X1 < 0", "X2 < 0")
 
 ggtree(tree, ladderize = FALSE) + scale_x_reverse() + coord_flip() +
-  geom_tiplab(vjust = 2, hjust = 0.5) + 
-  geom_text2(aes(label=label, subset=!isTip), hjust = -0.1, vjust = -1)
+  geom_tiplab(vjust = 2, hjust = 0.5) +
+  geom_text2(aes(label = label, subset = !isTip), hjust = -0.1, vjust = -1)
 ```
 
 > b. Create a diagram similar to the left-hand panel of Figure 8.12, using the
@@ -126,14 +128,14 @@ ggtree(tree, ladderize = FALSE) + scale_x_reverse() + coord_flip() +
 >    mean for each region.
 
 ```{r}
-plot(NULL, xlab="X1", ylab="X2", xlim = c(-1, 2), ylim = c(0, 3), xaxs = "i", yaxs = "i")
+plot(NULL, xlab = "X1", ylab = "X2", xlim = c(-1, 2), ylim = c(0, 3), xaxs = "i", yaxs = "i")
 abline(h = 1, col = "red", lty = 2)
 lines(c(1, 1), c(0, 1), col = "blue", lty = 2)
 lines(c(-1, 2), c(2, 2), col = "red", lty = 2)
 lines(c(0, 0), c(1, 2), col = "blue", lty = 2)
 text(
-  c(0, 1.5, -0.5, 1, 0.5), 
-  c(0.5, 0.5, 1.5, 1.5, 2.5), 
+  c(0, 1.5, -0.5, 1, 0.5),
+  c(0.5, 0.5, 1.5, 1.5, 2.5),
   labels = c("-1.80", "0.63", "-1.06", "0.21", "2.49")
 )
 ```
@@ -187,11 +189,11 @@ train <- sample(c(TRUE, FALSE), nrow(Boston), replace = TRUE)
 
 rf_err <- function(mtry) {
   randomForest(
-    Boston[train, -13], 
-    y = Boston[train, 13], 
-    xtest = Boston[!train, -13], 
-    ytest = Boston[!train, 13], 
-    mtry = mtry, 
+    Boston[train, -13],
+    y = Boston[train, 13],
+    xtest = Boston[!train, -13],
+    ytest = Boston[!train, 13],
+    mtry = mtry,
     ntree = 500
   )$test$mse
 }
@@ -200,12 +202,12 @@ names(res) <- c(1, 2, 3, 5, 7, 10, 12)
 data.frame(res, check.names = FALSE) |>
   mutate(n = 1:500) |>
   pivot_longer(!n) |>
-  ggplot(aes(x = n, y = value, color = name)) + 
-    geom_line(na.rm = TRUE) + 
-    xlab("Number of trees") + 
-    ylab("Error") +
-    scale_y_log10() +
-    scale_color_discrete(name = "No. variables at\neach split")
+  ggplot(aes(x = n, y = value, color = name)) +
+  geom_line(na.rm = TRUE) +
+  xlab("Number of trees") +
+  ylab("Error") +
+  scale_y_log10() +
+  scale_color_discrete(name = "No. variables at\neach split")
 ```
 
 ### Question 8
@@ -267,8 +269,10 @@ carseats_mse(ptr)
 ```{r}
 # Here we can use random Forest with mtry = 10 = p (the number of predictor
 # variables) to perform bagging
-bagged <- randomForest(Sales ~ ., data = Carseats[train, ], mtry = 10, 
-  ntree = 200, importance = TRUE)
+bagged <- randomForest(Sales ~ .,
+  data = Carseats[train, ], mtry = 10,
+  ntree = 200, importance = TRUE
+)
 carseats_mse(bagged)
 importance(bagged)
 ```
@@ -282,8 +286,10 @@ regression tree above.
 >    considered at each split, on the error rate obtained.
 
 ```{r}
-rf <- randomForest(Sales ~ ., data = Carseats[train, ], mtry = 3, 
-  ntree = 500, importance = TRUE)
+rf <- randomForest(Sales ~ .,
+  data = Carseats[train, ], mtry = 3,
+  ntree = 500, importance = TRUE
+)
 carseats_mse(rf)
 importance(rf)
 ```
@@ -296,12 +302,13 @@ regression tree above, although not quite as good as the bagging approach.
 ```{r}
 library(BART)
 
-# For ease, we'll create a fake "predict" method that just returns 
+# For ease, we'll create a fake "predict" method that just returns
 # yhat.test.mean regardless of provided "newdata"
 predict.wbart <- function(model, ...) model$yhat.test.mean
 
-bartfit <- gbart(Carseats[train, 2:11], Carseats[train, 1], 
-  x.test = Carseats[!train, 2:11])
+bartfit <- gbart(Carseats[train, 2:11], Carseats[train, 1],
+  x.test = Carseats[!train, 2:11]
+)
 carseats_mse(bartfit)
 ```
 
@@ -444,17 +451,21 @@ test <- setdiff(1:nrow(dat), train)
 ```{r}
 library(gbm)
 set.seed(42)
-lambdas <- 10 ^ seq(-3, 0, by = 0.1)
+lambdas <- 10^seq(-3, 0, by = 0.1)
 fits <- lapply(lambdas, function(lam) {
-  gbm(Salary ~ ., data = dat[train, ], distribution = "gaussian", 
-    n.trees = 1000, shrinkage = lam)
+  gbm(Salary ~ .,
+    data = dat[train, ], distribution = "gaussian",
+    n.trees = 1000, shrinkage = lam
+  )
 })
 errs <- sapply(fits, function(fit) {
   p <- predict(fit, dat[train, ], n.trees = 1000)
   mean((p - dat[train, ]$Salary)^2)
 })
-plot(lambdas, errs, type = "b", xlab = "Shrinkage values", 
-  ylab = "Training MSE", log = "xy")
+plot(lambdas, errs,
+  type = "b", xlab = "Shrinkage values",
+  ylab = "Training MSE", log = "xy"
+)
 ```
 
 > d. Produce a plot with different shrinkage values on the $x$-axis and the
@@ -465,8 +476,10 @@ errs <- sapply(fits, function(fit) {
   p <- predict(fit, dat[test, ], n.trees = 1000)
   mean((p - dat[test, ]$Salary)^2)
 })
-plot(lambdas, errs, type = "b", xlab = "Shrinkage values", 
-  ylab = "Training MSE", log = "xy")
+plot(lambdas, errs,
+  type = "b", xlab = "Shrinkage values",
+  ylab = "Training MSE", log = "xy"
+)
 min(errs)
 abline(v = lambdas[which.min(errs)], lty = 2, col = "red")
 ```
@@ -558,7 +571,7 @@ For logistic regression we correctly predict 14% of those predicted to purchase.
 ```{r}
 library(class)
 # KNN
-fit <- knn(Caravan[train, -86], Caravan[test, -86],  Caravan$Purchase[train])
+fit <- knn(Caravan[train, -86], Caravan[test, -86], Caravan$Purchase[train])
 table(fit, Caravan[test, "Purchase"] == "Yes")
 sum(fit == "Yes" & Caravan[test, "Purchase"] == "Yes") / sum(fit == "Yes")
 ```
@@ -587,8 +600,8 @@ test <- setdiff(1:nrow(College), train)
 lr <- gam(Outstate ~ ., data = College[train, ])
 
 # GAM from chapter 7
-gam <- gam(Outstate ~ Private + s(Room.Board, 2) + s(PhD, 2) + 
-    s(perc.alumni, 2) + s(Expend, 2) + s(Grad.Rate, 2), data = College[train, ])
+gam <- gam(Outstate ~ Private + s(Room.Board, 2) + s(PhD, 2) +
+  s(perc.alumni, 2) + s(Expend, 2) + s(Grad.Rate, 2), data = College[train, ])
 
 # Boosting
 boosted <- gbm(Outstate ~ ., data = College[train, ], n.trees = 1000, shrinkage = 0.01)
@@ -601,8 +614,9 @@ rf <- randomForest(Outstate ~ ., data = College[train, ], mtry = 4, ntree = 1000
 
 # BART
 pred <- setdiff(colnames(College), "Outstate")
-bart <- gbart(College[train, pred], College[train, "Outstate"], 
-  x.test = College[test, pred])
+bart <- gbart(College[train, pred], College[train, "Outstate"],
+  x.test = College[test, pred]
+)
 
 mse <- function(model, ...) {
   pred <- predict(model, College[test, ], ...)
@@ -619,7 +633,8 @@ res <- c(
 )
 res <- data.frame("MSE" = res)
 res$Model <- factor(row.names(res), levels = rev(row.names(res)))
-ggplot(res, aes(Model, MSE)) + coord_flip() + 
+ggplot(res, aes(Model, MSE)) +
+  coord_flip() +
   geom_bar(stat = "identity", fill = "steelblue")
 ```
 
diff --git a/09-support-vector-mechines.Rmd b/09-support-vector-mechines.Rmd
index 4cae116..efbd505 100644
--- a/09-support-vector-mechines.Rmd
+++ b/09-support-vector-mechines.Rmd
@@ -6,36 +6,36 @@
 
 > This problem involves hyperplanes in two dimensions.
 >
-> a. Sketch the hyperplane $1 + 3X_1 − X_2 = 0$. Indicate the set of points for
->    which $1 + 3X_1 − X_2 > 0$, as well as the set of points for which
->    $1 + 3X_1 − X_2 < 0$.
+> a. Sketch the hyperplane $1 + 3X_1 - X_2 = 0$. Indicate the set of points for
+>    which $1 + 3X_1 - X_2 > 0$, as well as the set of points for which
+>    $1 + 3X_1 - X_2 < 0$.
 
 ```{r}
 library(ggplot2)
 xlim <- c(-10, 10)
 ylim <- c(-30, 30)
 points <- expand.grid(
-  X1 = seq(xlim[1], xlim[2], length.out = 50), 
+  X1 = seq(xlim[1], xlim[2], length.out = 50),
   X2 = seq(ylim[1], ylim[2], length.out = 50)
 )
-p <- ggplot(points, aes(x = X1, y = X2)) + 
-  geom_abline(intercept = 1, slope = 3) +  # X2 = 1 + 3X1 
+p <- ggplot(points, aes(x = X1, y = X2)) +
+  geom_abline(intercept = 1, slope = 3) + # X2 = 1 + 3X1
   theme_bw()
-p + geom_point(aes(color = 1 + 3*X1 - X2 > 0), size = 0.1) + 
-  scale_color_discrete(name = "1 + 3X1 − X2 > 0")
+p + geom_point(aes(color = 1 + 3 * X1 - X2 > 0), size = 0.1) +
+  scale_color_discrete(name = "1 + 3X1 - X2 > 0")
 ```
 
-> b. On the same plot, sketch the hyperplane $−2 + X_1 + 2X_2 = 0$. Indicate the
->    set of points for which $−2 + X_1 + 2X_2 > 0$, as well as the set of points
->    for which $−2 + X_1 + 2X_2 < 0$.
+> b. On the same plot, sketch the hyperplane $-2 + X_1 + 2X_2 = 0$. Indicate the
+>    set of points for which $-2 + X_1 + 2X_2 > 0$, as well as the set of points
+>    for which $-2 + X_1 + 2X_2 < 0$.
 
 ```{r}
-p + geom_abline(intercept = 1, slope = -1/2) +  # X2 = 1 - X1/2
+p + geom_abline(intercept = 1, slope = -1 / 2) + # X2 = 1 - X1/2
   geom_point(
-    aes(color = interaction(1 + 3*X1 - X2 > 0, -2 + X1 + 2*X2 > 0)), 
+    aes(color = interaction(1 + 3 * X1 - X2 > 0, -2 + X1 + 2 * X2 > 0)),
     size = 0.5
-  ) + 
-  scale_color_discrete(name = "(1 + 3X1 − X2 > 0).(−2 + X1 + 2X2 > 0)")
+  ) +
+  scale_color_discrete(name = "(1 + 3X1 - X2 > 0).(-2 + X1 + 2X2 > 0)")
 ```
 
 ### Question 2
@@ -44,30 +44,30 @@ p + geom_abline(intercept = 1, slope = -1/2) +  # X2 = 1 - X1/2
 > form $\beta_0 + \beta_1X_1 + \beta_2X_2 = 0$. We now investigate a non-linear
 > decision boundary.
 >
-> a. Sketch the curve $$(1+X_1)^2 +(2−X_2)^2 = 4$$.
+> a. Sketch the curve $$(1+X_1)^2 +(2-X_2)^2 = 4$$.
 
 ```{r}
 points <- expand.grid(
-  X1 = seq(-4, 2, length.out = 100), 
+  X1 = seq(-4, 2, length.out = 100),
   X2 = seq(-1, 5, length.out = 100)
 )
-p <- ggplot(points, aes(x = X1, y = X2, z = (1 + X1)^2 + (2 - X2)^2 - 4)) + 
-  geom_contour(breaks = 0, colour = "black") + 
+p <- ggplot(points, aes(x = X1, y = X2, z = (1 + X1)^2 + (2 - X2)^2 - 4)) +
+  geom_contour(breaks = 0, colour = "black") +
   theme_bw()
 p
 ```
 
 > b. On your sketch, indicate the set of points for which
->    $$(1 + X_1)^2 + (2 − X_2)^2 > 4,$$ as well as the set of points for which
->    $$(1 + X_1)^2 + (2 − X_2)^2 \leq 4.$$
+>    $$(1 + X_1)^2 + (2 - X_2)^2 > 4,$$ as well as the set of points for which
+>    $$(1 + X_1)^2 + (2 - X_2)^2 \leq 4.$$
 
 ```{r}
 p + geom_point(aes(color = (1 + X1)^2 + (2 - X2)^2 - 4 > 0), size = 0.1)
 ```
 
 > c. Suppose that a classifier assigns an observation to the blue class if $$(1
->    + X_1)^2 + (2 − X_2)^2 > 4,$$ and to the red class otherwise. To what class
->    is the observation $(0, 0)$ classified? $(−1, 1)$? $(2, 2)$? $(3, 8)$?
+>    + X_1)^2 + (2 - X_2)^2 > 4,$$ and to the red class otherwise. To what class
+>    is the observation $(0, 0)$ classified? $(-1, 1)$? $(2, 2)$? $(3, 8)$?
 
 ```{r}
 points <- data.frame(
@@ -81,9 +81,9 @@ ifelse((1 + points$X1)^2 + (2 - points$X2)^2 > 4, "blue", "red")
 >    $X_1$ and $X_2$, it is linear in terms of $X_1$, $X_1^2$, $X_2$, and
 >    $X_2^2$.
 
-The decision boundary is $$(1 + X_1)^2 + (2 − X_2)^2 -4 = 0$$ which we can expand 
+The decision boundary is $$(1 + X_1)^2 + (2 - X_2)^2 -4 = 0$$ which we can expand 
 to:
-$$1 + 2X_1 + X_1^2 + 4 − 4X_2 + X_2^2 - 4 = 0$$
+$$1 + 2X_1 + X_1^2 + 4 - 4X_2 + X_2^2 - 4 = 0$$
 which is linear in terms of $X_1$, $X_1^2$, $X_2$, $X_2^2$.
 
 ### Question 3
@@ -111,8 +111,8 @@ data <- data.frame(
   X2 = c(4, 2, 4, 4, 1, 3, 1),
   Y  = c(rep("Red", 4), rep("Blue", 3))
 )
-p <- ggplot(data, aes(x = X1, y = X2, color = Y)) + 
-  geom_point(size = 2) + 
+p <- ggplot(data, aes(x = X1, y = X2, color = Y)) +
+  geom_point(size = 2) +
   scale_colour_identity() +
   coord_cartesian(xlim = c(0.5, 4.5), ylim = c(0.5, 4.5))
 p
@@ -194,7 +194,7 @@ p + geom_abline(intercept = 1, slope = 0.5, lty = 2, col = "red")
 >    longer separable by a hyperplane.
 
 ```{r}
-p + geom_point(data = data.frame(X1 = 1, X2 = 3, Y  = "Blue"), shape = 15, size = 4)
+p + geom_point(data = data.frame(X1 = 1, X2 = 3, Y = "Blue"), shape = 15, size = 4)
 ```
 
 ## Applied
@@ -215,11 +215,12 @@ data <- data.frame(
   x = runif(100),
   y = runif(100)
 )
-score <- (2*data$x-0.5)^2 + (data$y)^2 - 0.5
+score <- (2 * data$x - 0.5)^2 + (data$y)^2 - 0.5
 data$class <- factor(ifelse(score > 0, "red", "blue"))
 
-p <- ggplot(data, aes(x = x, y = y, color = class)) + 
-  geom_point(size = 2) + scale_colour_identity()
+p <- ggplot(data, aes(x = x, y = y, color = class)) +
+  geom_point(size = 2) +
+  scale_colour_identity()
 p
 
 train <- 1:50
@@ -276,7 +277,7 @@ train$y <- factor(as.numeric((train$x1^2 - train$x2^2 > 0)))
 >    should display $X_1$ on the $x$-axis, and $X_2$ on the $y$-axis.
 
 ```{r}
-p <- ggplot(train, aes(x = x1, y = x2, color = y)) + 
+p <- ggplot(train, aes(x = x1, y = x2, color = y)) +
   geom_point(size = 2)
 p
 ```
@@ -300,7 +301,7 @@ plot_model <- function(fit) {
   } else {
     train$p <- factor(as.numeric(predict(fit) > 0))
   }
-  ggplot(train, aes(x = x1, y = x2, color = p)) + 
+  ggplot(train, aes(x = x1, y = x2, color = p)) +
     geom_point(size = 2)
 }
 
@@ -373,19 +374,20 @@ data$y <- (data$class == "red") * 5 + rnorm(200)
 
 # Add barley separable points (these are simulated "noise" values)
 newdata <- data.frame(x = rnorm(30))
-newdata$y <- 1.5*newdata$x + 3 + rnorm(30, 0, 1)
-newdata$class = ifelse((1.5*newdata$x + 3) - newdata$y > 0, "blue", "red")
+newdata$y <- 1.5 * newdata$x + 3 + rnorm(30, 0, 1)
+newdata$class <- ifelse((1.5 * newdata$x + 3) - newdata$y > 0, "blue", "red")
 
 data <- rbind(data, newdata)
 
 # remove any that cause misclassification leaving data that is barley linearly
 # separable, but along an axis that is not y = 2.5 (which would be correct
 # for the "true" data.
-data <- data[!(data$class == "red") == ((1.5*data$x + 3 - data$y) > 0), ]
+data <- data[!(data$class == "red") == ((1.5 * data$x + 3 - data$y) > 0), ]
 data <- data[sample(seq_len(nrow(data)), 200), ]
 
-p <- ggplot(data, aes(x = x, y = y, color = class)) + 
-  geom_point(size = 2) + scale_colour_identity() + 
+p <- ggplot(data, aes(x = x, y = y, color = class)) +
+  geom_point(size = 2) +
+  scale_colour_identity() +
   geom_abline(intercept = 3, slope = 1.5, lty = 2)
 p
 ```
@@ -401,9 +403,9 @@ How many training errors are misclassified for each value of cost?
 costs <- 10^seq(-3, 5)
 
 sapply(costs, function(cost) {
-    fit <- svm(as.factor(class) ~ ., data = data, kernel = "linear", cost = cost)
-    pred <- predict(fit, data)
-    sum(pred != data$class)
+  fit <- svm(as.factor(class) ~ ., data = data, kernel = "linear", cost = cost)
+  pred <- predict(fit, data)
+  sum(pred != data$class)
 })
 ```
 
@@ -413,7 +415,7 @@ Cross-validation errors
 out <- tune(svm, as.factor(class) ~ ., data = data, kernel = "linear", ranges = list(cost = costs))
 summary(out)
 data.frame(
-  cost = out$performances$cost, 
+  cost = out$performances$cost,
   misclass = out$performances$error * nrow(data)
 )
 ```
@@ -434,18 +436,18 @@ test$y <- (test$class == "red") * 5 + rnorm(200)
 p + geom_point(data = test, pch = 21)
 
 (errs <- sapply(costs, function(cost) {
-    fit <- svm(as.factor(class) ~ ., data = data, kernel = "linear", cost = cost)
-    pred <- predict(fit, test)
-    sum(pred != test$class)
+  fit <- svm(as.factor(class) ~ ., data = data, kernel = "linear", cost = cost)
+  pred <- predict(fit, test)
+  sum(pred != test$class)
 }))
 (cost <- costs[which.min(errs)])
 
 (fit <- svm(as.factor(class) ~ ., data = data, kernel = "linear", cost = cost))
 
 test$prediction <- predict(fit, test)
-p <- ggplot(test, aes(x = x, y = y, color = class, shape = prediction == class)) + 
-  geom_point(size = 2) + 
-  scale_colour_identity() 
+p <- ggplot(test, aes(x = x, y = y, color = class, shape = prediction == class)) +
+  geom_point(size = 2) +
+  scale_colour_identity()
 p
 ```
 
@@ -480,8 +482,10 @@ set.seed(42)
 costs <- 10^seq(-4, 3, by = 0.5)
 results <- list()
 f <- high_mpg ~ displacement + horsepower + weight
-results$linear <- tune(svm, f, data = data, kernel = "linear", 
-  ranges = list(cost = costs))
+results$linear <- tune(svm, f,
+  data = data, kernel = "linear",
+  ranges = list(cost = costs)
+)
 summary(results$linear)
 ```
 
@@ -490,12 +494,16 @@ summary(results$linear)
 >    on your results.
 
 ```{r}
-results$polynomial <- tune(svm, f, data = data, kernel = "polynomial", 
-  ranges = list(cost = costs, degree = 1:3))
+results$polynomial <- tune(svm, f,
+  data = data, kernel = "polynomial",
+  ranges = list(cost = costs, degree = 1:3)
+)
 summary(results$polynomial)
 
-results$radial <- tune(svm, f, data = data, kernel = "radial", 
-  ranges = list(cost = costs, gamma = 10^(-2:1)))
+results$radial <- tune(svm, f,
+  data = data, kernel = "radial",
+  ranges = list(cost = costs, gamma = 10^(-2:1))
+)
 summary(results$radial)
 
 sapply(results, function(x) x$best.performance)
@@ -527,9 +535,9 @@ sapply(results, function(x) x$best.parameters)
 ```{r}
 table(predict(results$radial$best.model, data), data$high_mpg)
 
-plot(results$radial$best.model, data, horsepower~displacement)
-plot(results$radial$best.model, data, horsepower~weight)
-plot(results$radial$best.model, data, displacement~weight)
+plot(results$radial$best.model, data, horsepower ~ displacement)
+plot(results$radial$best.model, data, horsepower ~ weight)
+plot(results$radial$best.model, data, displacement ~ weight)
 ```
 
 ### Question 8
@@ -572,8 +580,10 @@ errs(fit)
 >    range 0.01 to 10.
 
 ```{r}
-tuned <- tune(svm, Purchase ~ ., data = OJ[train, ], kernel = "linear", 
-  ranges = list(cost = 10^seq(-2, 1, length.out = 10)))
+tuned <- tune(svm, Purchase ~ .,
+  data = OJ[train, ], kernel = "linear",
+  ranges = list(cost = 10^seq(-2, 1, length.out = 10))
+)
 tuned$best.parameters
 summary(tuned)
 ```
@@ -588,8 +598,10 @@ errs(tuned$best.model)
 >    kernel. Use the default value for `gamma`.
 
 ```{r}
-tuned2 <- tune(svm, Purchase ~ ., data = OJ[train, ], kernel = "radial", 
-  ranges = list(cost = 10^seq(-2, 1, length.out = 10)))
+tuned2 <- tune(svm, Purchase ~ .,
+  data = OJ[train, ], kernel = "radial",
+  ranges = list(cost = 10^seq(-2, 1, length.out = 10))
+)
 tuned2$best.parameters
 errs(tuned2$best.model)
 ```
@@ -598,8 +610,10 @@ errs(tuned2$best.model)
 >    polynomial kernel. Set `degree = 2`.
 
 ```{r}
-tuned3 <- tune(svm, Purchase ~ ., data = OJ[train, ], kernel = "polynomial", 
-  ranges = list(cost = 10^seq(-2, 1, length.out = 10)), degree = 2)
+tuned3 <- tune(svm, Purchase ~ .,
+  data = OJ[train, ], kernel = "polynomial",
+  ranges = list(cost = 10^seq(-2, 1, length.out = 10)), degree = 2
+)
 tuned3$best.parameters
 errs(tuned3$best.model)
 ```
diff --git a/10-deep-learning.Rmd b/10-deep-learning.Rmd
index 3108399..0a0c973 100644
--- a/10-deep-learning.Rmd
+++ b/10-deep-learning.Rmd
@@ -248,7 +248,7 @@ absolute error.
 
 > Consider the simple function $R(\beta) = sin(\beta) + \beta/10$.
 >
-> a. Draw a graph of this function over the range $\beta \in [−6, 6]$.
+> a. Draw a graph of this function over the range $\beta \in [-6, 6]$.
 
 ```{r}
 r <- function(x) sin(x) + x / 10
diff --git a/11-survival-analysis-and-censored-data.Rmd b/11-survival-analysis-and-censored-data.Rmd
index 221c7a1..cce8e6c 100644
--- a/11-survival-analysis-and-censored-data.Rmd
+++ b/11-survival-analysis-and-censored-data.Rmd
@@ -310,7 +310,7 @@ These are equivalent to 11.5 and 11.6.
 
 > Recall that the survival function $S(t)$, the hazard function $h(t)$, and the
 > density function $f(t)$ are defined in (11.2), (11.9), and (11.11),
-> respectively. Furthermore, define $F(t) = 1 − S(t)$. Show that the following
+> respectively. Furthermore, define $F(t) = 1 - S(t)$. Show that the following
 > relationships hold:
 >
 > $$
@@ -337,7 +337,7 @@ Integrating and then exponentiating we get the second identity.
 > survival times follow an exponential distribution.
 >
 > a. Suppose that a survival time follows an $Exp(\lambda)$ distribution, so
-> that its density function is $f(t) = \lambda\exp(−\lambda t)$. Using the
+> that its density function is $f(t) = \lambda\exp(-\lambda t)$. Using the
 > relationships provided in Exercise 8, show that $S(t) = \exp(-\lambda t)$.
 
 The cdf of an exponential distribution is $1 - \exp(-\lambda x)$ and 
diff --git a/12-unsupervised-learning.Rmd b/12-unsupervised-learning.Rmd
index 20ce5dc..198aa47 100644
--- a/12-unsupervised-learning.Rmd
+++ b/12-unsupervised-learning.Rmd
@@ -142,7 +142,8 @@ d <- data.frame(
   x1 = c(1, 1, 0, 5, 6, 4),
   x2 = c(4, 3, 4, 1, 2, 0)
 )
-ggplot(d, aes(x = x1, y = x2)) + geom_point()
+ggplot(d, aes(x = x1, y = x2)) +
+  geom_point()
 ```
 
 > b. Randomly assign a cluster label to each observation. You can use the
@@ -157,7 +158,7 @@ d$cluster <- sample(c(1, 2), size = nrow(d), replace = TRUE)
 > c. Compute the centroid for each cluster.
 
 ```{r}
-centroids <- sapply(c(1,2), function(i) colMeans(d[d$cluster == i, 1:2]))
+centroids <- sapply(c(1, 2), function(i) colMeans(d[d$cluster == i, 1:2]))
 ```
 
 > d. Assign each observation to the centroid to which it is closest, in terms of
@@ -165,7 +166,7 @@ centroids <- sapply(c(1,2), function(i) colMeans(d[d$cluster == i, 1:2]))
 
 ```{r}
 dist <- sapply(1:2, function(i) {
-    sqrt((d$x1 - centroids[1, i])^2 + (d$x2 - centroids[2, i])^2)
+  sqrt((d$x1 - centroids[1, i])^2 + (d$x2 - centroids[2, i])^2)
 })
 d$cluster <- apply(dist, 1, which.min)
 ```
@@ -173,9 +174,9 @@ d$cluster <- apply(dist, 1, which.min)
 > e. Repeat (c) and (d) until the answers obtained stop changing.
 
 ```{r}
-centroids <- sapply(c(1,2), function(i) colMeans(d[d$cluster == i, 1:2]))
+centroids <- sapply(c(1, 2), function(i) colMeans(d[d$cluster == i, 1:2]))
 dist <- sapply(1:2, function(i) {
-    sqrt((d$x1 - centroids[1, i])^2 + (d$x2 - centroids[2, i])^2)
+  sqrt((d$x1 - centroids[1, i])^2 + (d$x2 - centroids[2, i])^2)
 })
 d$cluster <- apply(dist, 1, which.min)
 ```
@@ -186,7 +187,8 @@ In this case, we get stable labels after the first iteration.
 >    labels obtained.
 
 ```{r}
-ggplot(d, aes(x = x1, y = x2, color = factor(cluster))) + geom_point()
+ggplot(d, aes(x = x1, y = x2, color = factor(cluster))) +
+  geom_point()
 ```
 
 ### Question 4
@@ -263,7 +265,7 @@ kmeans(dat, 2)$cluster
 > It turns out that these two measures are almost equivalent: if each
 > observation has been centered to have mean zero and standard deviation one,
 > and if we let $r_{ij}$ denote the correlation between the $i$th and $j$th
-> observations, then the quantity $1 − r_{ij}$ is proportional to the squared
+> observations, then the quantity $1 - r_{ij}$ is proportional to the squared
 > Euclidean distance between the ith and jth observations.
 >
 > On the `USArrests` data, show that this proportionality holds.
@@ -393,8 +395,10 @@ data[classes == "C", 5:30] <- data[classes == "C", 5:30] + 1
 
 ```{r}
 pca <- prcomp(data)
-ggplot(data.frame(Class = classes, PC1 = pca$x[, 1], PC2 = pca$x[, 2]),
-    aes(x = PC1, y = PC2, col = Class)) + 
+ggplot(
+  data.frame(Class = classes, PC1 = pca$x[, 1], PC2 = pca$x[, 2]),
+  aes(x = PC1, y = PC2, col = Class)
+) +
   geom_point()
 ```