Fix typos

switch volcano plot to a case_when statement.
twbattaglia · Apr 3, 2017 · 4f9c67b · 4f9c67b
1 parent f8a5328
commit 4f9c67b
Show file tree

Hide file tree

Showing 6 changed files with 134 additions and 120 deletions.
diff --git a/README.Rmd b/README.Rmd
@@ -6,9 +6,9 @@ output: github_document
 ```{r setup, include=FALSE}
 library(knitr)
 knitr::opts_chunk$set(echo = TRUE)
-library(DESeq2) # statistical analysis 
-library(ggplot2) # plotting 
-library(knitr) # for better formatting
+library(DESeq2)
+library(ggplot2) 
+library(knitr)
 library(clusterProfiler)
 library(biomaRt)
 library(ReactomePA)
@@ -39,19 +39,19 @@ Miniconda is a comprehensive and easy to use package manager for Python (among o
 
 ```{bash, eval = F}
 # Download the Miniconda3 installer to your home directory (Only for macOS)
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O ~/minoconda.sh
+wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O ~/miniconda.sh
 
 # Download the Miniconda3 installer to your home directory (Only for LINUX or Cluster)
-wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/minoconda.sh
+wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
 
 # Run the miniconda installation
-bash minoconda.sh -b -f -p ~/miniconda
+bash miniconda.sh -b -f -p ~/miniconda
 
 # Add miniconda to the system path
 echo 'PATH="$HOME/miniconda/bin:$PATH' >> ~/.bash_profile
 
 # Source system file to activate miniconda
-~/.bash_profile
+source ~/.bash_profile
 
 # Add bioinformatic channels for downloading required packages
 conda config --add channels conda-forge
@@ -78,30 +78,30 @@ cd new_workflow
 ##### Folder breakdown
 ```{bash, eval = F}
 ── new_workflow/
-  │   └── annotation/ -> Genome annotation file (.GTF/.GFF)
+  │   └── annotation/               <- Genome annotation file (.GTF/.GFF)
   │  
-  │   └── genome/ -> Host genome file (.FASTA)
+  │   └── genome/                   <- Host genome file (.FASTA)
   │  
-  │   └── input/ -> Location of input  RNAseq data
+  │   └── input/                    <- Location of input  RNAseq data
   │  
-  │   └── output/ ->  Data generated during processing steps
-  │       ├── 1_initial_qc/ - Main alignment files for each sample
-  │       ├── 2_trimmed_output/ - Log from running STAR alignment step
-  │       ├── 3_rRNA/ - STAR alignment counts output (for comparison with featureCounts)
-  │           ├── aligned/ - Sequences that aligned to rRNA databases (rRNA contaminated)
-  │           ├── filtered/ - Sequences with rRNA sequences removed  (rRNA-free)
-  │           ├── logs/ - logs from running SortMeRNA
-  │       ├── 4_aligned_sequences/ - Main alignment files for each sample
-  │           ├── aligned_bam/ - Alignment files generated from STAR (.BAM)
-  │           ├── aligned_logs/ - Log from running STAR alignment step
-  │       ├── 5_final_counts/ - Summarized gene counts across all samples
-  │       ├── 6_multiQC/ - Overall report of logs for each step
+  │   └── output/                   <- Data generated during processing steps
+  │       ├── 1_initial_qc/         <- Main alignment files for each sample
+  │       ├── 2_trimmed_output/     <-  Log from running STAR alignment step
+  │       ├── 3_rRNA/               <- STAR alignment counts output (for comparison with featureCounts)
+  │           ├── aligned/          <-  Sequences that aligned to rRNA databases (rRNA contaminated)
+  │           ├── filtered/         <-  Sequences with rRNA sequences removed  (rRNA-free)
+  │           ├── logs/             <- logs from running SortMeRNA
+  │       ├── 4_aligned_sequences/  <- Main alignment files for each sample
+  │           ├── aligned_bam/      <-  Alignment files generated from STAR (.BAM)
+  │           ├── aligned_logs/     <- Log from running STAR alignment step
+  │       ├── 5_final_counts/       <- Summarized gene counts across all samples
+  │       ├── 6_multiQC/            <- Overall report of logs for each step
   │  
-  │   └── sortmerna_db/ -> Folder to store the rRNA databases for SortMeRNA
-  │       ├── index/ - indexed versions of the rRNA sequences for faster alignment
-  │       ├── rRNA_databases/ - rRNA sequences from bacteria, archea and eukaryotes
+  │   └── sortmerna_db/             <- Folder to store the rRNA databases for SortMeRNA
+  │       ├── index/                <- indexed versions of the rRNA sequences for faster alignment
+  │       ├── rRNA_databases/       <- rRNA sequences from bacteria, archea and eukaryotes
   │  
-  │   └── star_index/ -> Folder to store the indexed genome files from STAR 
+  │   └── star_index/               <-  Folder to store the indexed genome files from STAR 
 ```
 
 
@@ -181,8 +181,8 @@ input/sample.fastq
 #### Output
 ```{bash, eval = F}
 ── results/1_initial_qc/
-    └──  sample_fastqc.html - HTML file of FastQC fquality analysis figures
-    └──  sample_fastqc.zip - FastQC report data
+    └──  sample_fastqc.html   <-  HTML file of FastQC fquality analysis figures
+    └──  sample_fastqc.zip    <- FastQC report data
 ```
 
 ---
@@ -223,10 +223,10 @@ input/sample.fastq
 #### Output
 ```{bash, eval = F}
 ── results/2_trimmed_output/
-     └──  sample_trimmed.fq - Trimmed sequencing file (.fastq)
-     └──  sample_trimmed.html - HTML file of FastQC fquality analysis figures
-     └──  sample_trimmed.zip -  FastQC report data
-     └──  sample.fastq.trimming_report.txt -  Cutadapt trimming report
+     └──  sample_trimmed.fq                 <-  Trimmed sequencing file (.fastq)
+     └──  sample_trimmed.html               <- HTML file of FastQC fquality analysis figures
+     └──  sample_trimmed.zip                <- FastQC report data
+     └──  sample.fastq.trimming_report.txt  <-   Cutadapt trimming report
 ```
 
 ---
@@ -307,9 +307,9 @@ mv -v results/3_rRNA/aligned//sample_aligned.log results/3_rRNA/logs
 #### Output
 ```{bash, eval = F}
 ── results/3_rRNA/
-    └── aligned/sample_aligned.fq - sequences with rRNA contamination
-    └── filtered/sample_filtered.fq - sequences without any rRNA contamination
-    └── logs/sample_aligned.log - log from SortMeRNA analysis
+    └── aligned/sample_aligned.fq     <-  sequences with rRNA contamination
+    └── filtered/sample_filtered.fq   <- sequences without any rRNA contamination
+    └── logs/sample_aligned.log       <- log from SortMeRNA analysis
 ```
 
 
@@ -370,9 +370,9 @@ mv -v results/4_aligned_sequences/sample*Log.out results/4_aligned_sequences/ali
 #### Output
 ```{bash, eval = F}
 ── results/4_aligned_sequences/
-    └── aligned_bam/sampleAligned.sortedByCoord.out.bam - Sorted BAM alignment fole
-    └── aligned_logs/sampleLog.final.out - Log of STAR alignment rate
-    └── aligned_logs/sampleLog.out - Log of steps take during STAR alignment
+    └── aligned_bam/sampleAligned.sortedByCoord.out.bam   <- Sorted BAM alignment fole
+    └── aligned_logs/sampleLog.final.out                  <- Log of STAR alignment rate
+    └── aligned_logs/sampleLog.out                        <- Log of steps take during STAR alignment
 ```
 
 ---
@@ -417,8 +417,8 @@ cd ../../../
 #### Output
 ```{bash, eval = F}
 ── results/5_final_counts/
-    └── final_counts.txt - Final gene counts across all samples
-    └── final_counts.txt.summary - Summary of gene summarization 
+    └── final_counts.txt                <- Final gene counts across all samples
+    └── final_counts.txt.summary        <- Summary of gene summarization 
 ```
 
 ---
@@ -450,8 +450,8 @@ multiqc results \
 #### Output
 ```{bash, eval = F}
 ── results/6_multiQC/
-    └── multiqc_report.html - Beautiful figures representing the logs from each step
-    └── multiqc_data/ - Folder of data that multiqc found from various log files
+    └── multiqc_report.html     <- Beautiful figures representing the logs from each step
+    └── multiqc_data/           <-  Folder of data that multiqc found from various log files
 ```
 
 
@@ -464,7 +464,7 @@ Once the workflow has completed, you can now use the gene count table as an inpu
 ```{r install, eval = F}
 source("https://bioconductor.org/biocLite.R")
 biocLite("DESeq2") ; library(DESeq2)
-biocLite("ggplot2") ; library(DESeq2)
+biocLite("ggplot2") ; library(ggplot2)
 biocLite("clusterProfiler") ; library(clusterProfiler)
 biocLite("biomaRt") ; library(biomaRt)
 biocLite("ReactomePA") ; library(ReactomePA)
@@ -580,6 +580,13 @@ results$entrez <- mapIds(x = org.Mm.eg.db,
                          keytype = "SYMBOL",
                          multiVals = "first")
 
+# Add ENSEMBL
+results$ensembl <- mapIds(x = org.Mm.eg.db,
+                          keys = row.names(results),
+                          column = "ENSEMBL",
+                          keytype = "SYMBOL",
+                          multiVals = "first")
+
 # Subset for only significant genes (q < 0.05)
 results_sig <- subset(results, padj < 0.05)
 head(results_sig)
@@ -673,28 +680,25 @@ pheatmap(mat = mat,
 ```{r volcano_plot, eval = T}
 # Gather Log-fold change and FDR-corrected pvalues from DESeq2 results
 ## - Change pvalues to -log10 (1.3 = 0.05)
-data <- data.frame(pval = -log10(results$padj), 
-                   lfc = results$log2FoldChange, 
-                   row.names = row.names(results))
+data <- data.frame(gene = row.names(results),
+                   pval = -log10(results$padj), 
+                   lfc = results$log2FoldChange)
 
 # Remove any rows that have NA as an entry
 data <- na.omit(data)
 
 # Color the points which are up or down
 ## If fold-change > 0 and pvalue > 1.3 (Increased significant)
 ## If fold-change < 0 and pvalue > 1.3 (Decreased significant)
-data <- mutate(data, color = ifelse(test = lfc > 0 & pval > 1.3, 
-                                    yes = "Increased", 
-                                    no = ifelse(test = lfc < 0 & pval > 1.3, 
-                                                yes = "Decreased", 
-                                                no = "nonsignificant")))
-
+data <- mutate(data, color = case_when(data$lfc > 0 & data$pval > 1.3 ~ "Increased",
+                                       data$lfc < 0 & data$pval > 1.3 ~ "Decreased",
+                                       data$pval < 1.3 ~ "nonsignificant"))
 # Make a basic ggplot2 object with x-y values
 vol <- ggplot(data, aes(x = lfc, y = pval, color = color))
 
 # Add ggplot2 layers
 vol +   
-  ggtitle(label = "Volcano Plot", subtitle = "Colored by fold-change directionality") +
+  ggtitle(label = "Volcano Plot", subtitle = "Colored by fold-change direction") +
   geom_point(size = 2.5, alpha = 0.8, na.rm = T) +
   scale_color_manual(name = "Directionality",
                      values = c(Increased = "#008B00", Decreased = "#CD4F39", nonsignificant = "darkgray")) +