diff --git a/lessons/git/slides.md b/lessons/git/slides.md index d138f6e..fd0f06c 100644 --- a/lessons/git/slides.md +++ b/lessons/git/slides.md @@ -67,6 +67,6 @@ git config --list ## Generate these slides using (using R): ## -{% highlight r %} +```r rmarkdown::render('slides.Rmd') -{% endhighlight %} +``` diff --git a/lessons/r-wrangling/cheatsheet.md b/lessons/r-wrangling/cheatsheet.md index ccad661..b7a197d 100644 --- a/lessons/r-wrangling/cheatsheet.md +++ b/lessons/r-wrangling/cheatsheet.md @@ -39,13 +39,13 @@ file formats. > Example code: -{% highlight r %} +```r ## Export write.csv(swiss, file = 'swiss.csv') ## Which is the same as: write.table(swiss, file = 'swiss.csv', sep = ',') -{% endhighlight %} +``` ## `read.csv` or `read.table` ## @@ -54,10 +54,10 @@ write.table(swiss, file = 'swiss.csv', sep = ',') > Example code: -{% highlight r %} +```r write.csv(swiss, file = 'swiss.csv') read.csv('swiss.csv') -{% endhighlight %} +``` ## `head`, `names`, `str`, `summary` ## @@ -71,13 +71,13 @@ median, frequency, and other basic statistics of each variable in the dataframe. > Example code: -{% highlight r %} +```r head(swiss) names(swiss) str(swiss) summary(swiss) class(swiss) -{% endhighlight %} +``` ## `%>%` ## @@ -90,7 +90,7 @@ do for some functions/commands (like `lm()`). > Example code: -{% highlight r %} +```r library(dplyr) ## This is the package that the pipe comes from library(magrittr) @@ -104,7 +104,7 @@ head(swiss) swiss %>% head swiss %>% head() swiss %>% head(.) -{% endhighlight %} +``` ## `tbl_df` ## @@ -114,14 +114,14 @@ dataframe prettier. > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same tbl_df(ds) ds %>% tbl_df ds %>% tbl_df() ds %>% tbl_df(.) -{% endhighlight %} +``` ## `select` ## @@ -131,7 +131,7 @@ variables based on pattern or if it contains some letter. > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same select(swiss, Education, Catholic, Fertility) @@ -143,7 +143,7 @@ swiss %>% select(-Education, -Catholic) ## Select variables based on name or pattern swiss %>% select(starts_with('E'), contains('Fert'), matches('mort')) -{% endhighlight %} +``` ## `rename` ## @@ -155,7 +155,7 @@ is, as it only renames. > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same rename(swiss, edu = Education) @@ -167,7 +167,7 @@ swiss %>% rename(edu = Education, fert = Fertility) ## If you want to use select, but get the same functionality as rename, use the ## everything() function to select all other variables in the dataframe swiss %>% select(edu = Education, everything()) -{% endhighlight %} +``` ## `filter` ## @@ -178,7 +178,7 @@ greater than, `==` equals, `>=` or `<=` greater/less than or equal to. > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same filter(swiss, Catholic < 20, Examination == 15) @@ -187,7 +187,7 @@ swiss %>% filter(., Catholic < 20, Examination == 15) ## For string/factor variables swiss %>% filter(X == 'Aigle') -{% endhighlight %} +``` ## `mutate` ## @@ -197,7 +197,7 @@ that assigns a value based on the condition. > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same mutate(swiss, Infertile = ifelse(Fertility < 50, 'yes', 'no')) @@ -206,7 +206,7 @@ swiss %>% mutate(., Infertile = ifelse(Fertility < 50, 'yes', 'no')) ## Or.. swiss %>% mutate(Test = 'yes', Number = 10) -{% endhighlight %} +``` ## `arrange` ## @@ -216,7 +216,7 @@ given (eg. `arrange(var1, var2)` sorts first by `var1` than by `var2`). > Example code: -{% highlight r %} +```r library(dplyr) ## These are the same arrange(swiss, Education, Examination) @@ -225,7 +225,7 @@ swiss %>% arrange(., Education, Examination) ## Or to do it descending swiss %>% arrange(desc(Education)) -{% endhighlight %} +``` ## `group_by` ## @@ -236,12 +236,12 @@ following commands based on the grouping. > Example code: -{% highlight r %} +```r library(dplyr) swiss %>% mutate(EarlyDeath = ifelse(Infant.Mortality >= 50, 'yes', 'no')) %>% group_by(EarlyDeath) -{% endhighlight %} +``` ## `summarise` ## @@ -252,14 +252,14 @@ for sample size. This function is best used with `group_by()`. > Example code: -{% highlight r %} +```r library(dplyr) swiss %>% mutate(Educated = ifelse(Education >= 50, 'yes', 'no')) %>% group_by(Educated) %>% str() summarise(mean = mean(Agriculture)) -{% endhighlight %} +``` ## `gather` ## @@ -273,7 +273,7 @@ exclude (with a `-`) after the name of the two new variables. > Example code: -{% highlight r %} +```r library(dplyr) library(tidyr) ## These are the same @@ -286,7 +286,7 @@ swiss %>% add_rownames() %>% gather(Measure, Value, -rowname) ## Or include only some variables swiss %>% gather(Measure, Value, Education, Fertility, Infant.Mortality) -{% endhighlight %} +``` ## `spread` ## @@ -296,19 +296,19 @@ dataframes. > Example code: -{% highlight r %} +```r library(dplyr) library(tidyr) swiss %>% add_rownames() %>% gather(Measure, Value, -rowname) %>% spread(Measure, Value) -{% endhighlight %} +``` # Combined example using (almost) all functions: -{% highlight r %} +```r swiss %>% add_rownames() %>% tbl_df() %>% @@ -319,5 +319,5 @@ swiss %>% group_by(Measure, Religious) %>% summarise(mean = mean(Value)) %>% spread(Measure, mean) -{% endhighlight %} +``` diff --git a/lessons/r-wrangling/intro.md b/lessons/r-wrangling/intro.md index 6d88165..98f7af0 100644 --- a/lessons/r-wrangling/intro.md +++ b/lessons/r-wrangling/intro.md @@ -56,12 +56,12 @@ command, use the `?` command along with the command you need help with (eg. The one I'm going to use is the `swiss` dataset. -{% highlight r %} +```r ## Export/save to file write.csv(swiss, file = 'swiss.csv') ## Import/read from file ds <- read.csv('swiss.csv') -{% endhighlight %} +``` ## Viewing your data @@ -73,13 +73,8 @@ column names and types). `summary()` shows a quick description of the summary statistics (means, median, frequency) for each of your columns. -{% highlight r %} +```r head(ds) -{% endhighlight %} - - - -{% highlight text %} ## X Fertility Agriculture Examination Education Catholic ## 1 Courtelary 80.2 17.0 15 12 9.96 ## 2 Delemont 83.1 45.1 6 9 84.84 @@ -94,31 +89,11 @@ head(ds) ## 4 20.3 ## 5 20.6 ## 6 26.6 -{% endhighlight %} - - - -{% highlight r %} names(ds) -{% endhighlight %} - - - -{% highlight text %} ## [1] "X" "Fertility" "Agriculture" ## [4] "Examination" "Education" "Catholic" ## [7] "Infant.Mortality" -{% endhighlight %} - - - -{% highlight r %} str(ds) -{% endhighlight %} - - - -{% highlight text %} ## 'data.frame': 47 obs. of 7 variables: ## $ X : Factor w/ 47 levels "Aigle","Aubonne",..: 8 9 12 26 28 34 5 13 15 38 ... ## $ Fertility : num 80.2 83.1 92.5 85.8 76.9 76.1 83.8 92.4 82.4 82.9 ... @@ -127,17 +102,7 @@ str(ds) ## $ Education : int 12 9 5 7 15 7 7 8 7 13 ... ## $ Catholic : num 9.96 84.84 93.4 33.77 5.16 ... ## $ Infant.Mortality: num 22.2 22.2 20.2 20.3 20.6 26.6 23.6 24.9 21 24.4 ... -{% endhighlight %} - - - -{% highlight r %} summary(ds) -{% endhighlight %} - - - -{% highlight text %} ## X Fertility Agriculture Examination ## Aigle : 1 Min. :35.00 Min. : 1.20 Min. : 3.00 ## Aubonne : 1 1st Qu.:64.70 1st Qu.:35.90 1st Qu.:12.00 @@ -154,7 +119,7 @@ summary(ds) ## 3rd Qu.:12.00 3rd Qu.: 93.125 3rd Qu.:21.70 ## Max. :53.00 Max. :100.000 Max. :26.60 ## -{% endhighlight %} +``` # Wrangling your data, `dplyr` style @@ -168,13 +133,8 @@ left-hand side, just like how a plumbing pipe works for water. `tbl_df` makes the object into a `tbl` class, making printing of the output nicer. -{% highlight r %} +```r library(dplyr) -{% endhighlight %} - - - -{% highlight text %} ## ## Attaching package: 'dplyr' ## @@ -185,20 +145,10 @@ library(dplyr) ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union -{% endhighlight %} - - - -{% highlight r %} library(tidyr) ## Compare head(ds) -{% endhighlight %} - - - -{% highlight text %} ## X Fertility Agriculture Examination Education Catholic ## 1 Courtelary 80.2 17.0 15 12 9.96 ## 2 Delemont 83.1 45.1 6 9 84.84 @@ -213,18 +163,8 @@ head(ds) ## 4 20.3 ## 5 20.6 ## 6 26.6 -{% endhighlight %} - - - -{% highlight r %} ## With: tbl_df(ds) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 7] ## ## X Fertility Agriculture Examination Education Catholic @@ -241,19 +181,10 @@ tbl_df(ds) ## 10 Sarine 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} - - -{% highlight r %} ## Now put the tbl dataset into a new object ds2 <- tbl_df(ds) ds2 -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 7] ## ## X Fertility Agriculture Examination Education Catholic @@ -270,7 +201,7 @@ ds2 ## 10 Sarine 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} +``` ## Select columns @@ -280,14 +211,9 @@ use of the `%>%` operator. This allows you to chain commands together, letting you do more with only a few commands. -{% highlight r %} +```r ds2 %>% select(Education, Catholic, Fertility) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 3] ## ## Education Catholic Fertility @@ -303,7 +229,7 @@ ds2 %>% ## 9 7 97.67 82.4 ## 10 13 91.38 82.9 ## .. ... ... ... -{% endhighlight %} +``` The real power with using the `select()` function comes when you combine it with [regular expressions (regexp)](http://www.regular-expressions.info/), or rather pattern @@ -319,14 +245,9 @@ regexp syntax are nearly a language to themselves, so use Ok, so lets say you want to search for variables that have certain patterns: -{% highlight r %} +```r ds2 %>% select(contains('Edu'), starts_with('Cath')) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 2] ## ## Education Catholic @@ -342,19 +263,10 @@ ds2 %>% ## 9 7 97.67 ## 10 13 91.38 ## .. ... ... -{% endhighlight %} - - -{% highlight r %} ## Or more simplified ds2 %>% select(matches('Edu|Cath')) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 2] ## ## Education Catholic @@ -370,7 +282,7 @@ ds2 %>% ## 9 7 97.67 ## 10 13 91.38 ## .. ... ... -{% endhighlight %} +``` You can see that if you have many variables that have a common structure to their name, you can quickly select all those variables by using functions such @@ -382,14 +294,9 @@ You can rename columns using the `rename` command (the new name is on the left hand side, so `newname = oldname`). -{% highlight r %} +```r ds2 %>% rename(County = X) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 7] ## ## County Fertility Agriculture Examination Education Catholic @@ -406,7 +313,7 @@ ds2 %>% ## 10 Sarine 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} +``` ## Filter rows @@ -416,15 +323,10 @@ if 'Examination' is equal to 15. A single `=` is used for something else (assigning things to objects or using them in functions/commands). -{% highlight r %} +```r ## For continuous/number data ds2 %>% filter(Catholic < 20, Examination == 15) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [3 x 7] ## ## X Fertility Agriculture Examination Education Catholic @@ -433,26 +335,17 @@ ds2 %>% ## 2 Yverdon 65.4 49.5 15 8 6.10 ## 3 Val de Ruz 77.6 37.6 15 7 4.97 ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} - - -{% highlight r %} ## Or for 'string' (words or letters) data ds2 %>% filter(X == 'Aigle') -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [1 x 7] ## ## X Fertility Agriculture Examination Education Catholic ## (fctr) (dbl) (dbl) (int) (int) (dbl) ## 1 Aigle 64.1 62 21 12 8.52 ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} +``` ## Create new columns or clean up existing ones @@ -461,15 +354,10 @@ If you want to create a new column, you use the `mutate` command. The on the condition. -{% highlight r %} +```r ds2 %>% mutate(Testing = 'yes', Infertile = ifelse(Fertility < 50, 'yes', 'no')) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 9] ## ## X Fertility Agriculture Examination Education Catholic @@ -487,7 +375,7 @@ ds2 %>% ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl), Testing (chr), Infertile ## (chr) -{% endhighlight %} +``` However, it's fairly common that you need to do some data janitorial work by cleaning up an existing column. For example, in a dataset with a 'Sex' variable, @@ -499,17 +387,12 @@ some values had data entry errors in spelling, such as 'fmale' when it should be '*g*lobablly *sub*stitute a pattern with the replacement. -{% highlight r %} +```r ds2 %>% mutate( X = gsub(pattern = '^G', replacement = 'J', X), X = gsub(pattern = 'e$', replacement = '', X) ) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 7] ## ## X Fertility Agriculture Examination Education Catholic @@ -526,7 +409,7 @@ ds2 %>% ## 10 Sarin 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} +``` Notice the `^` and `$` characters. Those are special syntax symbols used in regexp commands. We introduced them above, but we'll quickly go over @@ -537,14 +420,9 @@ means for all "e" that are at the end of a string. Or let"s say that all "mont", here. -{% highlight r %} +```r ds2 %>% mutate(X = gsub('mont|mnt|mout', 'ment', X, ignore.case = TRUE)) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 7] ## ## X Fertility Agriculture Examination Education Catholic @@ -561,7 +439,7 @@ ds2 %>% ## 10 Sarine 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... ## Variables not shown: Infant.Mortality (dbl) -{% endhighlight %} +``` Regular expressions are incredibly powerful, but also can be confusing. Make sure to check out [our resources page](/lessons/resources/) for links to sites @@ -577,18 +455,13 @@ actually conducting it. `arrange` sorts/orders/re-arranges the column Education in ascending order. -{% highlight r %} +```r ds2 %>% filter(Catholic > 20) %>% select(County = X, ## This renames the variable, just like the rename() command Education, Fertility, Agriculture) %>% arrange(Education) %>% mutate(infertile = ifelse(Fertility < 50, 'yes', 'no')) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [21 x 5] ## ## County Education Fertility Agriculture infertile @@ -604,7 +477,7 @@ ds2 %>% ## 9 Martigwy 6 70.5 78.2 no ## 10 Moutier 7 85.8 36.5 no ## .. ... ... ... ... ... -{% endhighlight %} +``` ## Re-organize your data (using `tidyr`) @@ -614,16 +487,11 @@ does. Note that you can remove a column by having a minus `-` sign in front of a variable when you use `select`. -{% highlight r %} +```r ## Compare this: ds2 %>% select(-Infant.Mortality) %>% rename(County = X) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 6] ## ## County Fertility Agriculture Examination Education Catholic @@ -639,21 +507,12 @@ ds2 %>% ## 9 Gruyere 82.4 53.3 12 7 97.67 ## 10 Sarine 82.9 45.2 16 13 91.38 ## .. ... ... ... ... ... ... -{% endhighlight %} - - -{% highlight r %} ## With this: ds2 %>% select(-Infant.Mortality) %>% rename(County = X) %>% gather(Measure, Value, -County) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [235 x 3] ## ## County Measure Value @@ -669,22 +528,13 @@ ds2 %>% ## 9 Gruyere Fertility 82.4 ## 10 Sarine Fertility 82.9 ## .. ... ... ... -{% endhighlight %} - - -{% highlight r %} ## And back again: ds2 %>% select(-Infant.Mortality) %>% rename(County = X) %>% gather(Measure, Value, -County) %>% spread(Measure, Value) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [47 x 6] ## ## County Fertility Agriculture Examination Education Catholic @@ -700,7 +550,7 @@ ds2 %>% ## 9 Delemont 83.1 45.1 6 9 84.84 ## 10 Echallens 68.3 72.6 18 2 24.20 ## .. ... ... ... ... ... ... -{% endhighlight %} +``` ## Summarise variables @@ -711,18 +561,13 @@ only contain the new variable(s) created, in this case the mean, as well as the grouping variable. -{% highlight r %} +```r ds2 %>% select(-X) %>% gather(Measure, Value) %>% group_by(Measure) %>% summarise(mean = mean(Value), sampleSize = n()) -{% endhighlight %} - - - -{% highlight text %} ## Source: local data frame [6 x 3] ## ## Measure mean sampleSize @@ -733,7 +578,7 @@ ds2 %>% ## 4 Education 10.97872 47 ## 5 Catholic 41.14383 47 ## 6 Infant.Mortality 19.94255 47 -{% endhighlight %} +``` ## Other useful and powerful examples @@ -747,7 +592,7 @@ package). If you want more details on how to use this set up, [check out my blog post about it](http://www.lukewjohnston.com/blog/loops-forests-multiple-linear-regressions/). -{% highlight r %} +```r ds2 %>% select(-X) %>% gather(Indep, Xvalue, Fertility, Agriculture) %>% @@ -755,10 +600,5 @@ ds2 %>% group_by(Dep, Indep) %>% do(lm(Yvalue ~ Xvalue + Infant.Mortality + Examination, data = .) %>% broom::tidy()) -{% endhighlight %} - - - -{% highlight text %} ## Error in tidy.lm(.): could not find function "is" -{% endhighlight %} +``` diff --git a/lessons/rintro/assignment.md b/lessons/rintro/assignment.md index 93bdab0..92da0d0 100644 --- a/lessons/rintro/assignment.md +++ b/lessons/rintro/assignment.md @@ -42,16 +42,16 @@ object of the resulting matrix; choose your own object name! *alpha:* -{% highlight text %} +``` ## Error in eval(expr, envir, enclos): could not find function "kable" -{% endhighlight %} +``` *beta:* -{% highlight text %} +``` ## Error in eval(expr, envir, enclos): could not find function "kable" -{% endhighlight %} +``` 3. Load the `datasets` package and make a new dataframe with the `mtcars` dataset. What are the means of the columns? Hint: use a loop function. Subset diff --git a/lessons/rintro/assignmentAnswers.md b/lessons/rintro/assignmentAnswers.md index 0cef3b0..2880592 100644 --- a/lessons/rintro/assignmentAnswers.md +++ b/lessons/rintro/assignmentAnswers.md @@ -5,104 +5,49 @@ published: true 1. -{% highlight r %} +```r a <- c(1:10) b <- c(11:20) (c <- a*b) -{% endhighlight %} - - - -{% highlight text %} ## [1] 11 24 39 56 75 96 119 144 171 200 -{% endhighlight %} - - - -{% highlight r %} c[5] -{% endhighlight %} - - - -{% highlight text %} ## [1] 75 -{% endhighlight %} - - - -{% highlight r %} subset(c, c > 50) -{% endhighlight %} - - - -{% highlight text %} ## [1] 56 75 96 119 144 171 200 -{% endhighlight %} +``` 2. -{% highlight r %} +```r (d <- matrix(c(7,9,12,2,4,13), 2, 3)) -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] ## [1,] 7 12 4 ## [2,] 9 2 13 -{% endhighlight %} - - - -{% highlight r %} (e <- matrix(c(1,7,12,19,2,8,13,20,3,9,14,21), 3, 4)) -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] [,4] ## [1,] 1 19 13 9 ## [2,] 7 2 20 14 ## [3,] 12 8 3 21 -{% endhighlight %} - - - -{% highlight r %} (f <- d %*% e) -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] [,4] ## [1,] 139 189 343 315 ## [2,] 179 279 196 382 -{% endhighlight %} +``` 3. -{% highlight r %} +```r library(datasets) (g <- mtcars) -{% endhighlight %} - - - -{% highlight text %} ## mpg cyl disp hp drat wt qsec vs am gear carb ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 @@ -136,95 +81,35 @@ library(datasets) ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6 ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8 ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2 -{% endhighlight %} - - - -{% highlight r %} apply(g, 2, mean) -{% endhighlight %} - - - -{% highlight text %} ## mpg cyl disp hp drat wt ## 20.090625 6.187500 230.721875 146.687500 3.596563 3.217250 ## qsec vs am gear carb ## 17.848750 0.437500 0.406250 3.687500 2.812500 -{% endhighlight %} - - - -{% highlight r %} (h <- g[,"mpg"]) -{% endhighlight %} - - - -{% highlight text %} ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 ## [29] 15.8 19.7 15.0 21.4 -{% endhighlight %} - - - -{% highlight r %} #OR (h <- g$mpg) -{% endhighlight %} - - - -{% highlight text %} ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2 ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4 ## [29] 15.8 19.7 15.0 21.4 -{% endhighlight %} - - - -{% highlight r %} (i <- g[, 4]) -{% endhighlight %} - - - -{% highlight text %} ## [1] 110 110 93 110 175 105 245 62 95 123 123 180 180 180 205 215 230 ## [18] 66 52 65 97 150 150 245 175 66 91 113 264 175 335 109 -{% endhighlight %} - - - -{% highlight r %} (j <- g[c(3,5),]) -{% endhighlight %} - - - -{% highlight text %} ## mpg cyl disp hp drat wt qsec vs am gear carb ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1 ## Hornet Sportabout 18.7 8 360 175 3.15 3.44 17.02 0 0 3 2 -{% endhighlight %} - - - -{% highlight r %} (k <- subset(g, g$hp < 100)) -{% endhighlight %} - - - -{% highlight text %} ## mpg cyl disp hp drat wt qsec vs am gear carb ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2 @@ -235,12 +120,12 @@ apply(g, 2, mean) ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1 ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1 ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2 -{% endhighlight %} +``` 4. -{% highlight r %} +```r ani <- c("cat", "dog", "cow", "pig") for (i in ani) { @@ -249,18 +134,13 @@ for (i in ani) { else { } } -{% endhighlight %} - - - -{% highlight text %} ## [1] "dog" -{% endhighlight %} +``` 5. -{% highlight r %} +```r stats <- function(a,b) { print (mean(a)) print (sd(a)) @@ -268,25 +148,15 @@ stats <- function(a,b) { } stats(g$disp, g$drat) -{% endhighlight %} - - - -{% highlight text %} ## [1] 230.7219 ## [1] 123.9387 ## [1] 3.695 -{% endhighlight %} +``` 6. -{% highlight r %} +```r lapply(g, range, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## $mpg ## [1] 10.4 33.9 ## @@ -319,53 +189,28 @@ lapply(g, range, na.rm = TRUE) ## ## $carb ## [1] 1 8 -{% endhighlight %} - - - -{% highlight r %} sapply(g, range, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## mpg cyl disp hp drat wt qsec vs am gear carb ## [1,] 10.4 4 71.1 52 2.76 1.513 14.5 0 0 3 1 ## [2,] 33.9 8 472.0 335 4.93 5.424 22.9 1 1 5 8 -{% endhighlight %} - - - -{% highlight r %} ## lapply always returns a list while sapply simiplied the result and returned a matrix -{% endhighlight %} +``` 7. -{% highlight r %} +```r mapply(range, mtcars[,c(1,6,7)], na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## mpg wt qsec ## [1,] 10.4 1.513 14.5 ## [2,] 33.9 5.424 22.9 -{% endhighlight %} +``` 8. -{% highlight r %} +```r tapply(mtcars$hp, mtcars$gear, max, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## 3 4 5 ## 245 123 335 -{% endhighlight %} +``` diff --git a/lessons/rintro/cheatsheet.md b/lessons/rintro/cheatsheet.md index fd8546d..8cc4007 100644 --- a/lessons/rintro/cheatsheet.md +++ b/lessons/rintro/cheatsheet.md @@ -30,14 +30,14 @@ types: character, numeric, integer, complex, and logical > Example code: -{% highlight r %} +```r a <- c(0.4, 1.2) # numeric b <- c(TRUE, FALSE) # logical c <- c("a","b","c") # character d <- c(1L, 2L) # integer e <- 1:10 # integer f <- c(1+0i, 2+4i) # complex -{% endhighlight %} +``` # Basic R object types (classes) # @@ -56,69 +56,24 @@ from the vector. > Example code: -{% highlight r %} +```r vector("numeric", length=10) -{% endhighlight %} - - - -{% highlight text %} ## [1] 0 0 0 0 0 0 0 0 0 0 -{% endhighlight %} - - - -{% highlight r %} x <- c(1:10) ## Select the first element x[1] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 -{% endhighlight %} - - - -{% highlight r %} ## Select the first and second element x[1:2] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 2 -{% endhighlight %} - - - -{% highlight r %} ## select the first and third element x[c(1,3)] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 3 -{% endhighlight %} - - - -{% highlight r %} ## Check out long the vector is length(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 10 -{% endhighlight %} +``` ## `list()`, `[[`, `$` ## @@ -131,14 +86,9 @@ command. > Example code: -{% highlight r %} +```r x <- list(num = 1, char = "a", logic = TRUE, complex = 1+4i) x -{% endhighlight %} - - - -{% highlight text %} ## $num ## [1] 1 ## @@ -150,58 +100,18 @@ x ## ## $complex ## [1] 1+4i -{% endhighlight %} - - - -{% highlight r %} ## Use [[ to select the contents inside the list x[[1]] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 -{% endhighlight %} - - - -{% highlight r %} x[[2]] -{% endhighlight %} - - - -{% highlight text %} ## [1] "a" -{% endhighlight %} - - - -{% highlight r %} x[[3]] -{% endhighlight %} - - - -{% highlight text %} ## [1] TRUE -{% endhighlight %} - - - -{% highlight r %} x2 <- list( vector = c(1:10), dataframe = data.frame(1:10, 1:10), char.list = list('a', 'b') ) x2 -{% endhighlight %} - - - -{% highlight text %} ## $vector ## [1] 1 2 3 4 5 6 7 8 9 10 ## @@ -224,45 +134,15 @@ x2 ## ## $char.list[[2]] ## [1] "b" -{% endhighlight %} - - - -{% highlight r %} ## Use the [ to select the vector number within the list item. x2[[1]] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 2 3 4 5 6 7 8 9 10 -{% endhighlight %} - - - -{% highlight r %} x2[[1]][1] -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 -{% endhighlight %} - - - -{% highlight r %} ## Select a list item specifically (if it is named) x2$vector -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 2 3 4 5 6 7 8 9 10 -{% endhighlight %} +``` ## `matrix()`, `dim()`, `nrow()`, `ncol()`, `rbind()`, `cbind()` ## @@ -277,172 +157,57 @@ the `dim()` attribute or by using `cbind()` and `rbind()`. > Example code: -{% highlight r %} +```r x <- matrix(1:6, nrow=2, ncol=3) x -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] ## [1,] 1 3 5 ## [2,] 2 4 6 -{% endhighlight %} - - - -{% highlight r %} # The first row, third column x[1,3] -{% endhighlight %} - - - -{% highlight text %} ## [1] 5 -{% endhighlight %} - - - -{% highlight r %} # The first two rows and second and third column x[1:2, 2:3] -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] ## [1,] 3 5 ## [2,] 4 6 -{% endhighlight %} - - - -{% highlight r %} ## Check the attributes of the matrix dim(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 3 -{% endhighlight %} - - - -{% highlight r %} ncol(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 3 -{% endhighlight %} - - - -{% highlight r %} nrow(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 -{% endhighlight %} - - - -{% highlight r %} attributes(x) -{% endhighlight %} - - - -{% highlight text %} ## $dim ## [1] 2 3 -{% endhighlight %} - - - -{% highlight r %} ## Creating a matrix from a vector x <- 1:10 x -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 2 3 4 5 6 7 8 9 10 -{% endhighlight %} - - - -{% highlight r %} dim(x) <- c(2,5) x -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] [,4] [,5] ## [1,] 1 3 5 7 9 ## [2,] 2 4 6 8 10 -{% endhighlight %} - - - -{% highlight r %} dim(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 5 -{% endhighlight %} - - - -{% highlight r %} ## Using rbind ('row bind') or cbind ('column bind') x <- 1:3 y <- 10:12 cbind(x, y) -{% endhighlight %} - - - -{% highlight text %} ## x y ## [1,] 1 10 ## [2,] 2 11 ## [3,] 3 12 -{% endhighlight %} - - - -{% highlight r %} rbind(x, y) -{% endhighlight %} - - - -{% highlight text %} ## [,1] [,2] [,3] ## x 1 2 3 ## y 10 11 12 -{% endhighlight %} +``` ## `factor()`, `table()` ## @@ -455,59 +220,24 @@ Factors are also treated differently by modelling functions like `lm()` and > Example code: -{% highlight r %} +```r x <- factor(c("yes", "yes", "no", "no", "yes")) x -{% endhighlight %} - - - -{% highlight text %} ## [1] yes yes no no yes ## Levels: no yes -{% endhighlight %} - - - -{% highlight r %} table(x) -{% endhighlight %} - - - -{% highlight text %} ## x ## no yes ## 2 3 -{% endhighlight %} - - - -{% highlight r %} unclass(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 2 1 1 2 ## attr(,"levels") ## [1] "no" "yes" -{% endhighlight %} - - - -{% highlight r %} ## Converting the factor into a numeric as.numeric(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 2 1 1 2 -{% endhighlight %} +``` ## `data.frame()`, `read.table()`, `read.csv()`, `[[`, `[`, `$` ## @@ -522,157 +252,52 @@ of the data, dataframes can be imported from an external files using the `read.table()` or `read.csv`, or can be created using `data.frame()`. -{% highlight r %} +```r x <- data.frame( foo = 1:4, bar = c(T,T,F,F), char = c('a', 'b', 'b', 'a') ) x -{% endhighlight %} - - - -{% highlight text %} ## foo bar char ## 1 1 TRUE a ## 2 2 TRUE b ## 3 3 FALSE b ## 4 4 FALSE a -{% endhighlight %} - - - -{% highlight r %} ## Select certain rows and columns: ## - 1:2 rows with 2:3 columns x[c(1:2), c(2:3)] -{% endhighlight %} - - - -{% highlight text %} ## bar char ## 1 TRUE a ## 2 TRUE b -{% endhighlight %} - - - -{% highlight r %} ## - Select columns by name x[c('foo', 'bar')] -{% endhighlight %} - - - -{% highlight text %} ## foo bar ## 1 1 TRUE ## 2 2 TRUE ## 3 3 FALSE ## 4 4 FALSE -{% endhighlight %} - - - -{% highlight r %} ## - Select rows by number and columns by name x[c(2:3), 'foo'] -{% endhighlight %} - - - -{% highlight text %} ## [1] 2 3 -{% endhighlight %} - - - -{% highlight r %} ## - Select column directly x$bar -{% endhighlight %} - - - -{% highlight text %} ## [1] TRUE TRUE FALSE FALSE -{% endhighlight %} - - - -{% highlight r %} ## - Select a single column using the list [[ select command x[[2]] -{% endhighlight %} - - - -{% highlight text %} ## [1] TRUE TRUE FALSE FALSE -{% endhighlight %} - - - -{% highlight r %} ## Check the attributes dim(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 4 3 -{% endhighlight %} - - - -{% highlight r %} nrow(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 4 -{% endhighlight %} - - - -{% highlight r %} ncol(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 3 -{% endhighlight %} - - - -{% highlight r %} length(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] 3 -{% endhighlight %} - - - -{% highlight r %} class(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] "data.frame" -{% endhighlight %} +``` # Attributes of objects in R # @@ -684,78 +309,33 @@ be modified with the `attributes()` and other functions. > Example code -{% highlight r %} +```r x <- 1:3 names(x) -{% endhighlight %} - - - -{% highlight text %} ## NULL -{% endhighlight %} - - - -{% highlight r %} ## Modify names attribute names(x) <- c("alpha", "beta", "gamma") x -{% endhighlight %} - - - -{% highlight text %} ## alpha beta gamma ## 1 2 3 -{% endhighlight %} - - - -{% highlight r %} names(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] "alpha" "beta" "gamma" -{% endhighlight %} - - - -{% highlight r %} ## Add to the class attribute class(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] "integer" -{% endhighlight %} - - - -{% highlight r %} class(x) <- c('my-own-class', class(x)) class(x) -{% endhighlight %} - - - -{% highlight text %} ## [1] "my-own-class" "integer" -{% endhighlight %} +``` # Looking for help in R # > Example code: -{% highlight r %} +```r ## Help for functions help('c') help('class') @@ -765,7 +345,7 @@ help('class') ## Help for specific packages help(package = 'ggplot2') help(package = 'rmarkdown') -{% endhighlight %} +``` # Using R packages # @@ -775,7 +355,7 @@ R packages that extend R's capabilities (such as `ggplot2`, our next lesson). > Example code: -{% highlight r %} +```r ## Check what's loaded search() @@ -787,7 +367,7 @@ library(dplyr) ## Install a package install.packages('ggplot2') install.packages('rmarkdown') -{% endhighlight %} +``` # Working Directory # @@ -798,7 +378,7 @@ where it will place any files you output. > Example code: -{% highlight r %} +```r ## Check the current directory getwd() @@ -807,7 +387,7 @@ getwd('path/to/new/directory') ## Create a directory dir.create('path') -{% endhighlight %} +``` # Importing tabular data # @@ -816,14 +396,14 @@ spreadsheet). The two most commonly-used functions used to import tabular data into R are `read.table()` and `read.csv()`. -{% highlight r %} +```r # \t is the regular expression for the tab key. This means our table is # tab-delimited (tab-separated) data <- read.table("file_name.txt", header = FALSE, sep = "\t") #.csv is a comma-separated table data2 <- read.csv("file_name.csv", header = TRUE) -{% endhighlight %} +``` # Saving/outputting tabular data # @@ -833,13 +413,13 @@ This is where you use `write.table()` or `write.csv()`. > Example code: -{% highlight r %} +```r ## Comma separated file write.table( data, file = "myfile.csv", sep = ",", row.names = TRUE, col.names = FALSE ) write.csv(data, file = 'myfile.csv') -{% endhighlight %} +``` # R operators and special symbols # @@ -889,10 +469,10 @@ identical(x, y) | whether x and y are the same > Example code: -{% highlight r %} +```r # commenting in R is easy! Everything past a # means that R will ignore whatever # is written there. -{% endhighlight %} +``` # Functions # @@ -911,16 +491,16 @@ function and save yourself time and effort later on! > Example code: -{% highlight r %} +```r f <- function(argument) { ## do something here commands more commands } -{% endhighlight %} +``` -{% highlight r %} +```r ## make a function called above10 with the argument x that subsets all variables ## of x that are greater than 10 @@ -947,7 +527,7 @@ columnmean <- function (y, remove.NA = TRUE) { } means } -{% endhighlight %} +``` ## `if .. else`, `ifelse` ## @@ -958,7 +538,7 @@ used within functions. > Example code: -{% highlight r %} +```r x <- 1:20 @@ -978,10 +558,10 @@ if (length(x) < 10) { } ifelse(x < 10, 'yes', 'no') -{% endhighlight %} +``` -{% highlight r %} +```r y <- c() if (length(x) > 3) { y <- 10 @@ -989,17 +569,7 @@ if (length(x) > 3) { y <- 0 } y -{% endhighlight %} - - - -{% highlight text %} ## [1] 0 -{% endhighlight %} - - - -{% highlight r %} ## Or y <- if(length(x) > 3) { @@ -1008,13 +578,8 @@ y <- if(length(x) > 3) { 0 } y -{% endhighlight %} - - - -{% highlight text %} ## [1] 0 -{% endhighlight %} +``` ## `for (x in list)` ## @@ -1025,15 +590,10 @@ data-intensive tasks. > Example code: -{% highlight r %} +```r for (i in 1:10) { print (i) } -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 ## [1] 2 ## [1] 3 @@ -1044,11 +604,6 @@ for (i in 1:10) { ## [1] 8 ## [1] 9 ## [1] 10 -{% endhighlight %} - - - -{% highlight r %} ## Nested loops x <- matrix(1:6, 2, 3) @@ -1057,18 +612,13 @@ for (i in seq_len(nrow(x))) { print (x[i, j]) } } -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 ## [1] 3 ## [1] 5 ## [1] 2 ## [1] 4 ## [1] 6 -{% endhighlight %} +``` # Loop Functions # @@ -1093,38 +643,23 @@ a name; other arguments as necessary. It will always return a list object. > Example code: -{% highlight r %} +```r x <- list(a=1:5, b=rnorm(10), c=rnorm(20,1), d=rnorm(100,5)) lapply(x, mean) -{% endhighlight %} - - - -{% highlight text %} ## $a ## [1] 3 ## ## $b -## [1] -0.08705441 +## [1] 0.03196616 ## ## $c -## [1] 0.8056751 +## [1] 0.6059004 ## ## $d -## [1] 5.003501 -{% endhighlight %} - - - -{% highlight r %} +## [1] 4.991253 x <- list(a=matrix(1:4,2,2), b=matrix(1:6,3,2)) x -{% endhighlight %} - - - -{% highlight text %} ## $a ## [,1] [,2] ## [1,] 1 3 @@ -1135,25 +670,15 @@ x ## [1,] 1 4 ## [2,] 2 5 ## [3,] 3 6 -{% endhighlight %} - - - -{% highlight r %} ## Use an anonymous function with the argument `ele` and then define that ## argument. lapply(x, function(ele) ele[ ,1]) -{% endhighlight %} - - - -{% highlight text %} ## $a ## [1] 1 2 ## ## $b ## [1] 1 2 3 -{% endhighlight %} +``` ## `sapply()` ## @@ -1165,17 +690,12 @@ same length, a matrix is returned. Otherwise a list is returned. > Example code: -{% highlight r %} +```r x <- list(a=1:5, b=rnorm(10), c=rnorm(20,1), d=rnorm(100,5)) sapply(x, mean) -{% endhighlight %} - - - -{% highlight text %} ## a b c d -## 3.00000000 0.01466929 0.91543331 5.03388351 -{% endhighlight %} +## 3.00000000 0.04576579 0.97643692 4.87379248 +``` ## `apply()` ## @@ -1186,81 +706,46 @@ means do the function to the columns. > Example code: -{% highlight r %} +```r x <- matrix (rnorm(200), 20, 10) # keeping number of columns and collapsing rows. This gives a vector of the # means of columns. apply(x, 2, mean) -{% endhighlight %} - - - -{% highlight text %} -## [1] 0.12347715 0.04770594 -0.23962572 0.03181958 -0.28658551 -## [6] 0.56491660 -0.10992450 -0.06652284 0.13402639 0.13483184 -{% endhighlight %} - - - -{% highlight r %} +## [1] 0.17125350 0.25014973 0.05637670 -0.23796313 0.31392653 +## [6] 0.38410844 -0.11645465 0.23821264 -0.04970403 -0.29748247 # this calculates the sum of all rows apply(x, 1, sum) -{% endhighlight %} - - - -{% highlight text %} -## [1] 1.8147165 -1.2577054 2.5727208 -1.3669601 1.7846913 -2.2291840 -## [7] 0.4621210 4.0021023 2.6337700 -1.0559236 -1.5868910 4.6012822 -## [13] -5.4770651 -0.1168984 -2.8939953 -0.1784035 4.6166377 0.2221600 -## [19] -0.1469406 0.2821437 -{% endhighlight %} - - - -{% highlight r %} +## [1] -4.4429821 -0.6574368 1.1911371 3.8616414 -2.2587809 7.8326782 +## [7] 0.6499274 7.3764224 1.3012891 -1.7725843 6.4859628 -1.1623079 +## [13] 2.7518836 -4.4755192 3.7266695 -1.7411285 -4.4297744 -1.3331671 +## [19] -3.0433755 4.3879104 ## Use additional arguments in apply, that it passes to the function quantile. y <- matrix(rnorm(200), 20, 10) apply(y, 1, quantile, probs = c(0.25, 0.75)) -{% endhighlight %} - - - -{% highlight text %} -## [,1] [,2] [,3] [,4] [,5] [,6] -## 25% -0.7647038 -0.7210468 -0.7600068 -0.003778185 -1.3521756 -0.2174439 -## 75% 0.4123376 0.6791686 0.7951071 0.895761712 0.9357661 0.9783733 +## [,1] [,2] [,3] [,4] [,5] [,6] +## 25% -0.5813985 -0.4155099 -0.3978411 -0.06204147 0.1816494 0.1391389 +## 75% 0.5445757 1.1493853 1.4713268 0.95988898 0.9294715 0.8546799 ## [,7] [,8] [,9] [,10] [,11] [,12] -## 25% -0.2691144 -1.55762093 -0.3051579 -0.3270544 -0.6420960 0.06768242 -## 75% 0.6831129 0.04226876 0.7276672 1.1130333 0.3072559 0.63674347 +## 25% -0.6096415 -0.67954171 -0.7963498 -1.1435226 -0.2180918 -0.5935117 +## 75% 0.8149493 0.07678966 0.2362088 0.3142852 0.7269817 0.4444604 ## [,13] [,14] [,15] [,16] [,17] [,18] -## 25% -0.7577007 -0.7888834 -0.4688051 -0.6615460 -0.6491867 -0.5835593 -## 75% 1.1956633 0.5516693 1.1701949 0.7859077 1.0033207 0.8368049 +## 25% -1.0947972 -0.5359842 -1.4454579 0.09282969 -0.2285679 -0.7160392 +## 75% -0.3598023 0.4620311 0.1353984 1.10250081 1.2127786 0.9207038 ## [,19] [,20] -## 25% -0.3362166 -0.9854436 -## 75% 0.3192613 0.7975759 -{% endhighlight %} - - - -{% highlight r %} +## 25% -0.5781012 -0.3674796 +## 75% 0.2874633 1.1798599 # With an array (stacks of matrices: multidimensional). # Create an array that looks like a bunch of 2 by 2 matrices and take the mean of # those. a <- array(rnorm(2*2*10), c(2,2,10)) apply(a, c(1,2), mean) #this keeps the 1st and 2nd dimension -{% endhighlight %} - - - -{% highlight text %} -## [,1] [,2] -## [1,] -0.03276905 -0.004964963 -## [2,] 0.17359854 -0.131615028 -{% endhighlight %} +## [,1] [,2] +## [1,] 0.3332423 -0.07199605 +## [2,] -0.4199372 0.17350889 +``` ## `mapply()` ## @@ -1268,13 +753,8 @@ apply(a, c(1,2), mean) #this keeps the 1st and 2nd dimension a set of arguments. -{% highlight r %} +```r mapply(rep, 1:4, 4:1) -{% endhighlight %} - - - -{% highlight text %} ## [[1]] ## [1] 1 1 1 1 ## @@ -1286,18 +766,8 @@ mapply(rep, 1:4, 4:1) ## ## [[4]] ## [1] 4 -{% endhighlight %} - - - -{% highlight r %} # is the same as list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1)) -{% endhighlight %} - - - -{% highlight text %} ## [[1]] ## [1] 1 1 1 1 ## @@ -1309,10 +779,10 @@ list(rep(1, 4), rep(2, 3), rep(3, 2), rep(4, 1)) ## ## [[4]] ## [1] 4 -{% endhighlight %} +``` -{% highlight r %} +```r ## create a function called noise that takes the arguments n, mean, and sd and ## produces random varibles with those specifications. noise<- function(n,mean, sd) { @@ -1321,75 +791,45 @@ noise<- function(n,mean, sd) { ## apply the noise function with n=5, mean=1 and sd=2 noise(5, 1, 2) -{% endhighlight %} - - - -{% highlight text %} -## [1] -0.750147 2.968726 2.636535 2.763283 4.332608 -{% endhighlight %} - - - -{% highlight r %} +## [1] 0.7930424 2.4740363 1.1699390 1.6019916 0.9895137 ## apply the noise function for n= 1 to 5, mean = 1 to 5, and sd = 2 mapply(noise, 1:5, 1:5, 2) -{% endhighlight %} - - - -{% highlight text %} ## [[1]] -## [1] 1.039934 +## [1] 1.247853 ## ## [[2]] -## [1] -0.463912 3.117271 +## [1] 0.8865116 6.9421407 ## ## [[3]] -## [1] 2.935257 4.951166 3.344060 +## [1] 5.044907 -1.076488 3.519662 ## ## [[4]] -## [1] 4.5938752 2.2269367 4.8059080 0.1818445 +## [1] 3.929218 3.703124 5.883016 7.454618 ## ## [[5]] -## [1] 7.1944041 1.8305581 5.4218587 -0.2811959 4.0780825 -{% endhighlight %} - - - -{% highlight r %} +## [1] 6.406565 4.475601 6.538984 2.527339 5.592072 ## which is the same as writing list(noise(1,1,2), noise(2,2,2), noise(3,3,2), noise(4,4,2), noise(5,5,2)) -{% endhighlight %} - - - -{% highlight text %} ## [[1]] -## [1] 1.153676 +## [1] 0.2882813 ## ## [[2]] -## [1] 3.4398032 0.1654232 +## [1] 2.271471 3.320668 ## ## [[3]] -## [1] 1.896161 5.242596 3.849094 +## [1] 5.249599 3.318627 3.110832 ## ## [[4]] -## [1] 2.189090 3.076175 6.126703 1.883835 +## [1] 6.224037 1.035009 2.433546 5.674936 ## ## [[5]] -## [1] 3.026996 4.327652 2.282642 3.456119 3.095467 -{% endhighlight %} - - - -{% highlight r %} +## [1] 7.351962 4.979856 4.897077 4.745803 3.608916 # (the outputs are different because our function makes new random varibles each # time) -{% endhighlight %} +``` ## `tapply()` ## @@ -1399,44 +839,19 @@ simplify the result, like sapply. > Example code: -{% highlight r %} +```r x <- c(1:30) # make a vector with 30 variables f<- gl(3,10) # make a factor variable with 3 levels and 10 reps of each level f -{% endhighlight %} - - - -{% highlight text %} ## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 ## Levels: 1 2 3 -{% endhighlight %} - - - -{% highlight r %} # apply the mean function to x, subsetting by f tapply(x, f, mean) -{% endhighlight %} - - - -{% highlight text %} ## 1 2 3 ## 5.5 15.5 25.5 -{% endhighlight %} - - - -{% highlight r %} # apply the range function to x, subsetting by f tapply(x, f, range) -{% endhighlight %} - - - -{% highlight text %} ## $`1` ## [1] 1 10 ## @@ -1445,7 +860,7 @@ tapply(x, f, range) ## ## $`3` ## [1] 21 30 -{% endhighlight %} +``` # Misc commands # @@ -1457,13 +872,8 @@ particular condition. > Example code: -{% highlight r %} +```r numvec <- c(2,5,8,9,0,6,7,8,4,5,7,11) subset(numvec, numvec < 9 & numvec > 4) -{% endhighlight %} - - - -{% highlight text %} ## [1] 5 8 6 7 8 5 7 -{% endhighlight %} +``` diff --git a/lessons/rintro/livecoding.md b/lessons/rintro/livecoding.md index 4bdd1f4..36e32df 100644 --- a/lessons/rintro/livecoding.md +++ b/lessons/rintro/livecoding.md @@ -13,7 +13,7 @@ published: true We will be using R studio for this course as it's a user-friendly GUI for R. You can also access R from Terminal (Mac) or Command Line (Windows) if you please. -{% highlight r %} +```r getwd() setwd("/Users/thesarahmeister/") @@ -24,7 +24,7 @@ setwd("Desktop/practice-2015-10/sarah/intror") dir() -{% endhighlight %} +``` # Now let's code! # @@ -32,20 +32,15 @@ Download the `airQuality.csv` file from our [Github repo](https://github.com/cod -{% highlight r %} +```r airQuality <- read.csv("airQuality.csv", header = TRUE) -{% endhighlight %} +``` Now let's look at the data using a few different functions -{% highlight r %} +```r head(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Solar.R Wind Temp Month Day ## 1 41 190 7.4 67 May 1 ## 2 36 118 8.0 72 May 2 @@ -53,18 +48,8 @@ head(airQuality) ## 4 18 313 11.5 62 May 4 ## 5 NA NA 14.3 56 May 5 ## 6 28 NA 14.9 66 May 6 -{% endhighlight %} - - - -{% highlight r %} head(airQuality, n=8) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Solar.R Wind Temp Month Day ## 1 41 190 7.4 67 May 1 ## 2 36 118 8.0 72 May 2 @@ -74,18 +59,8 @@ head(airQuality, n=8) ## 6 28 NA 14.9 66 May 6 ## 7 23 299 8.6 65 May 7 ## 8 19 99 13.8 59 May 8 -{% endhighlight %} - - - -{% highlight r %} tail(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Solar.R Wind Temp Month Day ## 148 14 20 16.6 63 Sep 25 ## 149 30 193 6.9 70 Sep 26 @@ -93,18 +68,8 @@ tail(airQuality) ## 151 14 191 14.3 75 Sep 28 ## 152 18 131 8.0 76 Sep 29 ## 153 20 223 11.5 68 Sep 30 -{% endhighlight %} - - - -{% highlight r %} str(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## 'data.frame': 153 obs. of 6 variables: ## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ... ## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ... @@ -112,18 +77,8 @@ str(airQuality) ## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ... ## $ Month : Factor w/ 5 levels "Aug","Jul","Jun",..: 4 4 4 4 4 4 4 4 4 4 ... ## $ Day : int 1 2 3 4 5 6 7 8 9 10 ... -{% endhighlight %} - - - -{% highlight r %} summary(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Solar.R Wind Temp ## Min. : 1.00 Min. : 7.0 Min. : 1.700 Min. :56.00 ## 1st Qu.: 18.00 1st Qu.:115.8 1st Qu.: 7.400 1st Qu.:72.00 @@ -140,61 +95,31 @@ summary(airQuality) ## Sep:30 3rd Qu.:23.0 ## Max. :31.0 ## -{% endhighlight %} - - - -{% highlight r %} nrow(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## [1] 153 -{% endhighlight %} - - - -{% highlight r %} ncol(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## [1] 6 -{% endhighlight %} - - - -{% highlight r %} range(airQuality$Temp) -{% endhighlight %} - - - -{% highlight text %} ## [1] 56 97 -{% endhighlight %} +``` Lets alter our dataset a bit -{% highlight r %} +```r names(airQuality$Solar.R) <- "Solar" airQuality$Random <- rnorm(153, 1, 2) -{% endhighlight %} +``` Okay time to subset our data -{% highlight r %} +```r rowThree <- airQuality[3, ] @@ -209,12 +134,12 @@ may <- airQuality[airQuality$Month == "May", ] highTempOrMay <- airQuality[airQuality$Month == "May" | airQuality$Temp > 80, ] lowTempAndSep <- subset(airQuality, airQuality$Month == "Sep" & airQuality$Temp < 70) -{% endhighlight %} +``` okay let's perform some basic data manipulations on our dataframe and subsetted data -{% highlight r %} +```r mean(airQuality$Ozone) #returns an NA @@ -229,12 +154,12 @@ colMeans(airQualNoMonth, na.rm=TRUE) sd(colFour) sd(airQualNoMonth) -{% endhighlight %} +``` What happens if we have multiple columns that are non-numeric and we don't want to remove them all to use the built-in `colMeans()` function? We can make our **own** function! -{% highlight r %} +```r columnmean <- function (y, remove.NA = TRUE) { nc <- ncol(y) means <- vector("numeric", length=0) @@ -245,26 +170,16 @@ columnmean <- function (y, remove.NA = TRUE) { } columnmean(airQuality) -{% endhighlight %} - - - -{% highlight text %} ## Warning in mean.default(y[, i], na.rm = remove.NA): argument is not numeric ## or logical: returning NA -{% endhighlight %} - - - -{% highlight text %} -## [1] 42.129310 185.931507 9.957516 77.882353 NA 15.803922 -## [7] 0.916601 -{% endhighlight %} +## [1] 42.1293103 185.9315068 9.9575163 77.8823529 NA 15.8039216 +## [7] 0.9840097 +``` Cool right? Now let's try a function with an if/else expression. -{% highlight r %} +```r above80 <- function(y) { y <- na.omit(y) @@ -283,11 +198,6 @@ above80 <- function(y) { } above80(airQuality$Temp) -{% endhighlight %} - - - -{% highlight text %} ## [[1]] ## [1] 81 84 85 82 87 90 87 93 92 82 83 84 85 81 84 83 83 88 92 92 89 82 81 ## [24] 91 81 82 84 87 85 81 82 86 85 82 86 88 86 83 81 81 81 82 86 85 87 89 @@ -298,18 +208,13 @@ above80(airQuality$Temp) ## [24] 61 57 58 57 67 79 76 78 74 67 79 80 79 77 72 65 73 76 77 76 76 76 75 ## [47] 78 73 80 77 73 80 74 80 79 77 79 76 78 78 77 72 75 79 80 78 75 73 76 ## [70] 77 71 71 78 67 76 68 64 71 69 63 70 77 75 76 68 -{% endhighlight %} - - - -{% highlight r %} highlow <- above80(airQuality$Temp) high <- highlow[[1]] low <- highlow[[2]] -{% endhighlight %} +``` # Loop Functions # @@ -325,20 +230,10 @@ The last thing for today are the very useful built-in loop functions. These func Remember the function we made? It can be replaced with an `lapply()` loop. -{% highlight r %} +```r lapply(airQuality, mean, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## Warning in mean.default(X[[i]], ...): argument is not numeric or logical: ## returning NA -{% endhighlight %} - - - -{% highlight text %} ## $Ozone ## [1] 42.12931 ## @@ -358,111 +253,76 @@ lapply(airQuality, mean, na.rm = TRUE) ## [1] 15.80392 ## ## $Random -## [1] 0.916601 -{% endhighlight %} +## [1] 0.9840097 +``` # sapply() # Alternatively we can try `sapply()`. This will return a named numeric vector (simpified from a list) -{% highlight r %} +```r sapply(airQuality, mean, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## Warning in mean.default(X[[i]], ...): argument is not numeric or logical: ## returning NA -{% endhighlight %} - - - -{% highlight text %} -## Ozone Solar.R Wind Temp Month Day -## 42.129310 185.931507 9.957516 77.882353 NA 15.803922 -## Random -## 0.916601 -{% endhighlight %} +## Ozone Solar.R Wind Temp Month Day +## 42.1293103 185.9315068 9.9575163 77.8823529 NA 15.8039216 +## Random +## 0.9840097 +``` # apply() # `apply()` first coerces your dataframe to a matrix, which means all the columns must have the same type. Because our dataframe does not meet this requirement, I'll be taking a subset of the data. However normally this function would be used with matrices. -{% highlight r %} +```r apply(airQuality[,c(-5)], 2, sd, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Solar.R Wind Temp Day Random -## 32.987885 90.058422 3.523001 9.465270 8.864520 2.091644 -{% endhighlight %} - - - -{% highlight r %} +## 32.987885 90.058422 3.523001 9.465270 8.864520 1.969605 apply(airQuality[,c(-5)], 1, sd, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} -## [1] 72.89058 46.94296 58.69988 121.77277 25.62174 25.75497 115.47911 -## [8] 38.64267 21.34867 81.09362 30.02192 98.62413 112.61584 105.66532 -## [15] 27.26847 129.50176 116.47159 31.26497 123.08330 23.93098 21.81966 -## [22] 123.51997 23.04112 33.19412 26.94703 110.01967 25.26733 22.88824 -## [29] 92.82220 84.03591 104.86704 122.41928 122.31465 102.42537 79.76670 -## [36] 94.18712 111.56434 51.01858 116.07814 110.38572 123.47021 109.27081 -## [43] 105.86196 58.30500 140.07391 135.54122 72.33150 106.82007 23.63969 -## [50] 46.01597 53.22244 63.11980 33.84056 41.67048 103.81005 55.83172 -## [57] 53.47395 29.03317 42.86913 29.24985 57.85399 106.70907 95.40798 -## [64] 91.31892 48.18947 67.71641 120.77618 105.84699 102.34370 104.07476 -## [71] 67.97639 60.74067 101.58831 66.48642 121.75326 30.67241 98.43243 -## [78] 104.27405 107.64244 72.14305 81.66779 26.80088 107.47906 122.58368 -## [85] 109.45345 84.18191 35.62429 36.17203 79.75871 103.09283 93.26686 -## [92] 94.07138 38.88395 29.95196 38.00552 43.19716 35.16671 41.00030 -## [99] 99.41880 87.31363 81.25121 94.66409 59.49559 73.33740 104.78695 -## [106] 59.10313 35.31902 32.01702 31.20583 45.29166 91.96314 70.91207 -## [113] 98.68951 25.94546 105.50772 78.26531 97.84691 80.28066 64.05905 -## [120] 75.83915 86.56595 88.41150 70.34087 68.32943 77.89078 72.86685 -## [127] 75.04048 43.14471 39.50083 97.28408 85.08832 88.34328 99.40968 -## [134] 89.14843 99.03010 90.96073 25.09182 44.32411 89.59789 84.73321 -## [141] 27.09205 90.80568 77.70208 91.01537 25.24025 52.22038 26.51072 -## [148] 21.49442 72.38703 58.93778 72.25072 50.24947 83.25093 -{% endhighlight %} +## [1] 72.82270 47.23650 59.37151 121.62190 24.84774 26.36542 115.67801 +## [8] 38.40799 21.54229 81.28748 30.96865 98.78830 112.67089 105.33482 +## [15] 26.06268 129.50212 116.96760 30.41116 123.01349 23.49386 21.77755 +## [22] 123.18123 21.93297 33.76490 27.92711 109.99494 23.57278 22.99182 +## [29] 93.55545 84.47980 105.21199 122.09558 122.80170 102.05193 79.94285 +## [36] 94.19656 111.30217 51.79243 116.17630 110.39896 123.98668 108.77720 +## [43] 105.49187 57.91526 139.74419 135.90928 72.64085 106.43035 23.10204 +## [50] 47.45831 53.64691 62.60415 34.39119 42.17424 104.44171 55.94074 +## [57] 54.14359 28.78309 43.28784 28.37771 57.90655 106.88412 95.12412 +## [64] 91.66259 48.21788 67.74415 120.33248 105.71376 102.17632 104.09465 +## [71] 68.53206 60.21272 102.35465 65.89063 121.89362 30.07331 98.23169 +## [78] 103.76614 107.19162 71.03225 81.71409 27.04795 107.51568 123.32384 +## [85] 110.08362 84.38543 36.13764 36.37102 79.16091 102.96910 93.30029 +## [92] 94.09817 39.48974 30.45752 37.68110 42.68174 35.07430 40.31110 +## [99] 99.76131 87.38370 81.39169 94.11566 59.85649 72.74146 104.86862 +## [106] 59.22848 35.14605 32.27141 32.23079 45.22937 92.05330 70.57883 +## [113] 98.45452 25.29983 104.64771 78.81379 97.84441 79.33138 64.18218 +## [120] 75.31127 86.26419 87.84973 70.58833 69.42031 76.89533 72.66027 +## [127] 74.43070 42.60674 40.47040 97.39282 84.61841 88.89872 99.49476 +## [134] 88.87661 99.30527 91.12215 25.12572 44.34830 89.09724 85.00021 +## [141] 27.43158 90.02379 77.34522 90.78009 24.87064 52.91799 27.15257 +## [148] 20.40687 72.09858 59.95946 71.92740 50.08571 83.41606 +``` # mapply # `mapply` is a multivariate apply (like `lapply`) that applies a function over a set of arguments. -{% highlight r %} +```r mapply(range, airQuality[,c(1,3,6)], na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## Ozone Wind Day ## [1,] 1 1.7 1 ## [2,] 168 20.7 31 -{% endhighlight %} +``` # tapply # `tapply` is used to apply a function over subsets of a vector. It takes two arguements: a vector to apply to function on and a factor variable that subsets the vector. It will simplify the result, like sapply. -{% highlight r %} +```r tapply(airQuality$Ozone, airQuality$Month, range, na.rm = TRUE) -{% endhighlight %} - - - -{% highlight text %} ## $Aug ## [1] 9 168 ## @@ -477,5 +337,5 @@ tapply(airQuality$Ozone, airQuality$Month, range, na.rm = TRUE) ## ## $Sep ## [1] 7 96 -{% endhighlight %} +``` diff --git a/lessons/rmarkdown/cheatsheet.md b/lessons/rmarkdown/cheatsheet.md index 73eec8a..f3a62d5 100644 --- a/lessons/rmarkdown/cheatsheet.md +++ b/lessons/rmarkdown/cheatsheet.md @@ -111,7 +111,7 @@ have it change the numbering for you. These can all be fixed by using > Example code: -{% highlight r %} +```r library(captioner) tabNums <- captioner(prefix = 'Table') tabNums('tab1', 'Caption for table 1') @@ -124,7 +124,7 @@ have it change the numbering for you. These can all be fixed by using ## cite in text using `r figNums('fig1', display = 'cite')` ## Include the caption for the figure in the code chunk ## using fig.cap=`r figNums('fig1')` -{% endhighlight %} +``` ## `render` ## diff --git a/lessons/rmarkdown/intro.md b/lessons/rmarkdown/intro.md index 6212086..b3a2c83 100644 --- a/lessons/rmarkdown/intro.md +++ b/lessons/rmarkdown/intro.md @@ -1,6 +1,6 @@ --- title: "Introduction: Report generation using R Markdown" -published: true +published: false author: - Luke W. Johnston date: 2015-10 @@ -64,7 +64,7 @@ both you and the computer will understand. A typical YAML metadata looks like: ``` --- title: "Introducing R Markdown" -published: true +published: false author: "Luke Johnston" date: "July 23, 2015" output: html_document @@ -113,16 +113,11 @@ with the power of R! Within the R Markdown document, you can insert R code chunks to input and create the output into the document. They look like this: - {% highlight r %} + ```r testCode <- 1:10 print(testCode) - {% endhighlight %} - - - - {% highlight text %} ## [1] 1 2 3 4 5 6 7 8 9 10 - {% endhighlight %} + ``` You'll notice that the code chunk printed off 1 to 10 and put it into the document. I didn't do that, R did it for me! @@ -151,11 +146,11 @@ object to print: including a `results = 'asis'` option in the code chunk. Here is an example: -{% highlight r %} +```r library(pander) pander(head(cars), caption = 'First few rows of the cars dataset.', style = 'rmarkdown') -{% endhighlight %} +``` @@ -176,113 +171,16 @@ Just like you can get R to create a table, you can also get R to create a figure. There are several chunk options for creating figures, including `fig.height`, `fig.width`, `dpi`, and `fig.cap`. -{% highlight r %} -plot(cars) -{% endhighlight %} - -![Scatterplot of speed and distance.](/images/fig-1.png) - -## Bibliography - -If you want to cite articles or references, use the `@` tag with the author key -(eg. `@Smith1995`). In addition to the `@` tag, you need to include the -following YAML option to the YAML header, with the location of the bibliography -file on your computer. - -``` -bibliography: /path/to/file -``` - -## Figure and table labels - -> *Note: The below labels don't work with conversion on the website. See [this -link](intro.html) for the HTML converted version that chows these labels -working.* - -Often you want to include figure or caption labels that you can cite easily in -your document. So, let's create two labels for a table and a figure using the -`captioner` command from the `captioner` package. First we set the prefix for -the caption: - - -{% highlight r %} -library(captioner) -figNums <- captioner(prefix = 'Figure') -tabNums <- captioner(prefix = 'Table') -{% endhighlight %} - -Then, we store the caption label and caption title (including the `results='hide'` -code chunk option, as these commands will print them to the output): -{% highlight r %} -figNums(name = 'figCars', caption = 'First few rows of the cars dataset.', - display = FALSE) -tabNums(name = 'tabCars', caption = 'Scatterplot of speed and distance.', - display = FALSE) -{% endhighlight %} -Now we can cite them in-text, using the ` Figure 1 `, -which then looks like Figure 1. And with the -plot, include this option in the plot code chunk ` Figure 1: First few rows of the cars dataset. `: -{% highlight r %} -plot(cars) -{% endhighlight %} - -![Figure 1: First few rows of the cars dataset.](/images/plotCap-1.png) - -Or with a table using ` Table 1 ` to show -Table 1. - - -{% highlight r %} -set.caption(tabNums('tabCars')) -pander(head(cars), style = 'rmarkdown') -{% endhighlight %} - - - -| speed | dist | -|:-------:|:------:| -| 4 | 2 | -| 4 | 10 | -| 7 | 4 | -| 7 | 22 | -| 8 | 16 | -| 9 | 10 | - -Table: Table 1: Scatterplot of speed and distance. -## Generating this `.Rmd` document and sharing with others -If you download the [source `.Rmd` version of this document](https://github.com/codeasmanuscript/workshops/tree/master/lessons/rmarkdown/intro.Rmd), -you can recreate the web version and the HTML version by using these commands -below (note the `eval=FALSE` in the code chunk): -{% highlight r %} -library(rmarkdown) -## html -render('intro.Rmd', 'html_document') -## md version on the website -render('intro.Rmd', 'md_document') -{% endhighlight %} -R Markdown can also make it very easy to collaborate with people who may not be -as tech-savvy and who don't know much outside of Microsoft Word by converting -all your `.Rmd` files into Word! Try it out: -{% highlight r %} -render('intro.Rmd', 'word_document') -{% endhighlight %} -Using R Markdown can help with making your code easier to understand. That's -because as you type out your code, you are also typing explanations and reasons -in markdown. You can then easily create documents, manuscripts, theses, and -other file types that others (and your future self) can read to understand your -code and your analysis better. Combine R and R Markdown with Git and GitHub and -you have a powerful tool for making your work better, more scientifically -rigorous and transparent, and share-able so others can use your work easily! diff --git a/lessons/rplotting/assignment.md b/lessons/rplotting/assignment.md deleted file mode 100644 index 2ac5889..0000000 --- a/lessons/rplotting/assignment.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -title: "Assignment: ..." -author: - - -date: YYYY-MM-DD -fontsize: 12pt -geometry: margin=1in -papersize: letterpaper -layout: page -sidebar: false -tag: - - Lessons - - Assignment - - (lesson topic) -categories: - - Lessons - - (lesson topic) ---- - -# List of potential exercises here: # - -* List item 1 -* List item 2 diff --git a/lessons/rplotting/cheatsheet.md b/lessons/rplotting/cheatsheet.md deleted file mode 100644 index ab1c6a3..0000000 --- a/lessons/rplotting/cheatsheet.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: "Cheatsheet: (topic)" -author: - - -date: YYYY-MM-DD -fontsize: 12pt -geometry: margin=1in -papersize: letterpaper -layout: page -sidebar: false -tag: - - Lessons - - Cheatsheet - - (lesson topic) -categories: - - Lessons - - (lesson topic) ---- - -Brief description/intro - -# (language) commands: Some useful or common ones # - -## `command` ## - -> Description - -> Example code: - - example code (note the tab/4 spaces indent) - -## `command` ## - -> Description - -> Example code: - - example code (note the tab/4 spaces indent) - diff --git a/lessons/rplotting/plottinginR.md b/lessons/rplotting/plottinginR.md deleted file mode 100644 index 4ed8a5a..0000000 --- a/lessons/rplotting/plottinginR.md +++ /dev/null @@ -1,156 +0,0 @@ ---- -title: "Plotting in R" -author: "Sarah Meister" -date: 2015-10-15 -fontsize: 12pt -geometry: margin=1in -papersize: letterpaper -layout: page -sidebar: false -output: slidy_presentation ---- - -# Base plotting systems # - -The base plotting system in R is the `plot()` function. - - -{% highlight r %} -library(datasets) -data(cars) -with(cars, plot(speed, dist)) -{% endhighlight %} - -![plot of chunk unnamed-chunk-1](/images/unnamed-chunk-1-1.png) - -# the Lattice system # - -This is the second base plotting system in R. Plots are created with a single call function. Margins and spacing are set automatically because the entire plot is specified at once. It is good at putting many plots on a screen and thus you can see things like how y changes with x over z. - - -{% highlight r %} -library (lattice) -state <- data.frame(state.x77, region = state.region) -xyplot(Life.Exp ~ Income | region, data=state, layout = c(4,1)) -{% endhighlight %} - -![plot of chunk unnamed-chunk-2](/images/unnamed-chunk-2-1.png) - -# ggplot2 # - -The ggplot2 package is an R package that uses the "Grammar of Graphics" to put together different aspects of data visualization to build quality graphics. It is a very popular package. - - - {% highlight r %} - install.packages("ggplot2") - {% endhighlight %} - - - - {% highlight text %} - ## Installing package into '/home/luke/R/x86_64-pc-linux-gnu-library/3.2' - ## (as 'lib' is unspecified) - {% endhighlight %} - - - - {% highlight text %} - ## Error in contrib.url(repos, type): trying to use CRAN without setting a mirror - {% endhighlight %} - - - - {% highlight r %} - - library(ggplot2) - {% endhighlight %} - - - - {% highlight text %} - ## Loading required package: methods - {% endhighlight %} - - # the Basics # - - `qplot()` is the basic plotting function in `ggplot2` and plots are made up of aesthetics (size, shape, color, etc.) and geoms (points, lines, etc.). - - `ggplot()` is the core function and can do things that `qplot` cannot. - - -{% highlight r %} -str(mpg) -{% endhighlight %} - - - -{% highlight text %} -## 'data.frame': 234 obs. of 11 variables: -## $ manufacturer: Factor w/ 15 levels "audi","chevrolet",..: 1 1 1 1 1 1 1 1 1 1 ... -## $ model : Factor w/ 38 levels "4runner 4wd",..: 2 2 2 2 2 2 2 3 3 3 ... -## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ... -## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ... -## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ... -## $ trans : Factor w/ 10 levels "auto(av)","auto(l3)",..: 4 9 10 1 4 9 1 9 4 10 ... -## $ drv : Factor w/ 3 levels "4","f","r": 2 2 2 2 2 2 2 1 1 1 ... -## $ cty : int 18 21 20 21 16 18 18 18 16 20 ... -## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ... -## $ fl : Factor w/ 5 levels "c","d","e","p",..: 4 4 4 4 4 4 4 4 4 4 ... -## $ class : Factor w/ 7 levels "2seater","compact",..: 2 2 2 2 2 2 2 2 2 2 ... -{% endhighlight %} - - - -{% highlight r %} - -qplot(displ, hwy, data = mpg) -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-1.png) - -{% highlight r %} - -qplot(displ, hwy, data = mpg, color=drv) -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-2.png) - -{% highlight r %} - -qplot(displ, hwy, data = mpg, geom=c("point", "smooth")) -{% endhighlight %} - - - -{% highlight text %} -## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method. -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-3.png) - -{% highlight r %} - -qplot(hwy, data=mpg, fill=drv) -{% endhighlight %} - - - -{% highlight text %} -## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this. -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-4.png) - -{% highlight r %} - -qplot(displ, hwy, data=mpg, facets =.~drv) -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-5.png) - -{% highlight r %} - -qplot(hwy, data=mpg, facet=drv~., binwidth=2) -{% endhighlight %} - -![plot of chunk unnamed-chunk-4](/images/unnamed-chunk-4-6.png) diff --git a/lessons/rplotting/slides.md b/lessons/rplotting/slides.md deleted file mode 100644 index 5bfb8c2..0000000 --- a/lessons/rplotting/slides.md +++ /dev/null @@ -1,103 +0,0 @@ ---- -title: "...title..." -author: -date: YYYY-MM-DD -layout: page -sidebar: false -classoption: xcolor=dvipsnames -tag: - - Lessons - - Slides - - (lesson topic) -categories: - - Lessons - - (lesson topic) -slide-level: 1 -fontsize: 8pt -header-includes: - - \input{../slideOptions.tex} ---- - -# Welcome to our Data-related workshop # - -## Purpose: ## - -To teach a few tips and tricks for more efficiently managing your -data, tracking your computer files, understanding appropriate -analytical approaches, and speeding up the process from code to -tables. - -. . . - -## Significance: ## - -Topics we cover will help you get more comfortable with data, reduce -the chance of overlooked errors, and give you more control over your -work. They are also all important parts of a science movement gaining -increasing attention -- Reproducible Research. - -# Caveat: We aren't here to teach statistics # - -Need help with stats? Use these resources! - -* U of T Statistical Consulting Services ([click here](http://www.utstat.toronto.edu/wordpress/?page_id=25)) - -* - -* - -# Overview of other workshops? # - -# Notes and help during this workshop # - -Go to this website: - - - -# Slide title # - -Text, some **bolded**, or *italics* - -__bold__ or _italics_ also works. - -[URL link here](http://link/here.com) - -``` - Code block -``` - -Or: - - Code block - -List here: - -* Item 1 -* Item 2 - -List, but one-by-one 'animation': - -> * Item 1 appears first -> * Item 2 appears second - -Inline `code text` - -Image: -![](../images/file.png) - -Footnote[^1] - -[^1]: Footnote text - -# Live coding # - -# Main Exercise # - -A pause/transition here (the . . .) - -. . . - -Numbered list - -1. Exercise 1 -2. Exercise 2