Skip to content

Commit

Permalink
Fix typos
Browse files Browse the repository at this point in the history
switch volcano plot to a case_when statement.
  • Loading branch information
twbattaglia committed Apr 3, 2017
1 parent f8a5328 commit 4f9c67b
Show file tree
Hide file tree
Showing 6 changed files with 134 additions and 120 deletions.
110 changes: 57 additions & 53 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ output: github_document
```{r setup, include=FALSE}
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
library(DESeq2) # statistical analysis
library(ggplot2) # plotting
library(knitr) # for better formatting
library(DESeq2)
library(ggplot2)
library(knitr)
library(clusterProfiler)
library(biomaRt)
library(ReactomePA)
Expand Down Expand Up @@ -39,19 +39,19 @@ Miniconda is a comprehensive and easy to use package manager for Python (among o

```{bash, eval = F}
# Download the Miniconda3 installer to your home directory (Only for macOS)
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O ~/minoconda.sh
wget https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O ~/miniconda.sh
# Download the Miniconda3 installer to your home directory (Only for LINUX or Cluster)
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/minoconda.sh
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
# Run the miniconda installation
bash minoconda.sh -b -f -p ~/miniconda
bash miniconda.sh -b -f -p ~/miniconda
# Add miniconda to the system path
echo 'PATH="$HOME/miniconda/bin:$PATH' >> ~/.bash_profile
# Source system file to activate miniconda
~/.bash_profile
source ~/.bash_profile
# Add bioinformatic channels for downloading required packages
conda config --add channels conda-forge
Expand All @@ -78,30 +78,30 @@ cd new_workflow
##### Folder breakdown
```{bash, eval = F}
── new_workflow/
│   └── annotation/ -> Genome annotation file (.GTF/.GFF)
│   └── annotation/ <- Genome annotation file (.GTF/.GFF)
│  
│   └── genome/ -> Host genome file (.FASTA)
│   └── genome/ <- Host genome file (.FASTA)
│  
│   └── input/ -> Location of input RNAseq data
│   └── input/ <- Location of input RNAseq data
│  
│   └── output/ -> Data generated during processing steps
│   ├── 1_initial_qc/ - Main alignment files for each sample
│   ├── 2_trimmed_output/ - Log from running STAR alignment step
│   ├── 3_rRNA/ - STAR alignment counts output (for comparison with featureCounts)
│   ├── aligned/ - Sequences that aligned to rRNA databases (rRNA contaminated)
│   ├── filtered/ - Sequences with rRNA sequences removed (rRNA-free)
│   ├── logs/ - logs from running SortMeRNA
│   ├── 4_aligned_sequences/ - Main alignment files for each sample
│   ├── aligned_bam/ - Alignment files generated from STAR (.BAM)
│   ├── aligned_logs/ - Log from running STAR alignment step
│   ├── 5_final_counts/ - Summarized gene counts across all samples
│   ├── 6_multiQC/ - Overall report of logs for each step
│   └── output/ <- Data generated during processing steps
│   ├── 1_initial_qc/ <- Main alignment files for each sample
│   ├── 2_trimmed_output/ <- Log from running STAR alignment step
│   ├── 3_rRNA/ <- STAR alignment counts output (for comparison with featureCounts)
│   ├── aligned/ <- Sequences that aligned to rRNA databases (rRNA contaminated)
│   ├── filtered/ <- Sequences with rRNA sequences removed (rRNA-free)
│   ├── logs/ <- logs from running SortMeRNA
│   ├── 4_aligned_sequences/ <- Main alignment files for each sample
│   ├── aligned_bam/ <- Alignment files generated from STAR (.BAM)
│   ├── aligned_logs/ <- Log from running STAR alignment step
│   ├── 5_final_counts/ <- Summarized gene counts across all samples
│   ├── 6_multiQC/ <- Overall report of logs for each step
│  
│   └── sortmerna_db/ -> Folder to store the rRNA databases for SortMeRNA
│   ├── index/ - indexed versions of the rRNA sequences for faster alignment
│   ├── rRNA_databases/ - rRNA sequences from bacteria, archea and eukaryotes
│   └── sortmerna_db/ <- Folder to store the rRNA databases for SortMeRNA
│   ├── index/ <- indexed versions of the rRNA sequences for faster alignment
│   ├── rRNA_databases/ <- rRNA sequences from bacteria, archea and eukaryotes
│  
│   └── star_index/ -> Folder to store the indexed genome files from STAR
│   └── star_index/ <- Folder to store the indexed genome files from STAR
```


Expand Down Expand Up @@ -181,8 +181,8 @@ input/sample.fastq
#### Output
```{bash, eval = F}
── results/1_initial_qc/
└── sample_fastqc.html - HTML file of FastQC fquality analysis figures
└── sample_fastqc.zip - FastQC report data
└── sample_fastqc.html <- HTML file of FastQC fquality analysis figures
└── sample_fastqc.zip <- FastQC report data
```

---
Expand Down Expand Up @@ -223,10 +223,10 @@ input/sample.fastq
#### Output
```{bash, eval = F}
── results/2_trimmed_output/
└── sample_trimmed.fq - Trimmed sequencing file (.fastq)
└── sample_trimmed.html - HTML file of FastQC fquality analysis figures
└── sample_trimmed.zip - FastQC report data
└── sample.fastq.trimming_report.txt - Cutadapt trimming report
└── sample_trimmed.fq <- Trimmed sequencing file (.fastq)
└── sample_trimmed.html <- HTML file of FastQC fquality analysis figures
└── sample_trimmed.zip <- FastQC report data
└── sample.fastq.trimming_report.txt <- Cutadapt trimming report
```

---
Expand Down Expand Up @@ -307,9 +307,9 @@ mv -v results/3_rRNA/aligned//sample_aligned.log results/3_rRNA/logs
#### Output
```{bash, eval = F}
── results/3_rRNA/
└── aligned/sample_aligned.fq - sequences with rRNA contamination
└── filtered/sample_filtered.fq - sequences without any rRNA contamination
└── logs/sample_aligned.log - log from SortMeRNA analysis
└── aligned/sample_aligned.fq <- sequences with rRNA contamination
└── filtered/sample_filtered.fq <- sequences without any rRNA contamination
└── logs/sample_aligned.log <- log from SortMeRNA analysis
```


Expand Down Expand Up @@ -370,9 +370,9 @@ mv -v results/4_aligned_sequences/sample*Log.out results/4_aligned_sequences/ali
#### Output
```{bash, eval = F}
── results/4_aligned_sequences/
└── aligned_bam/sampleAligned.sortedByCoord.out.bam - Sorted BAM alignment fole
└── aligned_logs/sampleLog.final.out - Log of STAR alignment rate
└── aligned_logs/sampleLog.out - Log of steps take during STAR alignment
└── aligned_bam/sampleAligned.sortedByCoord.out.bam <- Sorted BAM alignment fole
└── aligned_logs/sampleLog.final.out <- Log of STAR alignment rate
└── aligned_logs/sampleLog.out <- Log of steps take during STAR alignment
```

---
Expand Down Expand Up @@ -417,8 +417,8 @@ cd ../../../
#### Output
```{bash, eval = F}
── results/5_final_counts/
└── final_counts.txt - Final gene counts across all samples
└── final_counts.txt.summary - Summary of gene summarization
└── final_counts.txt <- Final gene counts across all samples
└── final_counts.txt.summary <- Summary of gene summarization
```

---
Expand Down Expand Up @@ -450,8 +450,8 @@ multiqc results \
#### Output
```{bash, eval = F}
── results/6_multiQC/
└── multiqc_report.html - Beautiful figures representing the logs from each step
└── multiqc_data/ - Folder of data that multiqc found from various log files
└── multiqc_report.html <- Beautiful figures representing the logs from each step
└── multiqc_data/ <- Folder of data that multiqc found from various log files
```


Expand All @@ -464,7 +464,7 @@ Once the workflow has completed, you can now use the gene count table as an inpu
```{r install, eval = F}
source("https://bioconductor.org/biocLite.R")
biocLite("DESeq2") ; library(DESeq2)
biocLite("ggplot2") ; library(DESeq2)
biocLite("ggplot2") ; library(ggplot2)
biocLite("clusterProfiler") ; library(clusterProfiler)
biocLite("biomaRt") ; library(biomaRt)
biocLite("ReactomePA") ; library(ReactomePA)
Expand Down Expand Up @@ -580,6 +580,13 @@ results$entrez <- mapIds(x = org.Mm.eg.db,
keytype = "SYMBOL",
multiVals = "first")
# Add ENSEMBL
results$ensembl <- mapIds(x = org.Mm.eg.db,
keys = row.names(results),
column = "ENSEMBL",
keytype = "SYMBOL",
multiVals = "first")
# Subset for only significant genes (q < 0.05)
results_sig <- subset(results, padj < 0.05)
head(results_sig)
Expand Down Expand Up @@ -673,28 +680,25 @@ pheatmap(mat = mat,
```{r volcano_plot, eval = T}
# Gather Log-fold change and FDR-corrected pvalues from DESeq2 results
## - Change pvalues to -log10 (1.3 = 0.05)
data <- data.frame(pval = -log10(results$padj),
lfc = results$log2FoldChange,
row.names = row.names(results))
data <- data.frame(gene = row.names(results),
pval = -log10(results$padj),
lfc = results$log2FoldChange)
# Remove any rows that have NA as an entry
data <- na.omit(data)
# Color the points which are up or down
## If fold-change > 0 and pvalue > 1.3 (Increased significant)
## If fold-change < 0 and pvalue > 1.3 (Decreased significant)
data <- mutate(data, color = ifelse(test = lfc > 0 & pval > 1.3,
yes = "Increased",
no = ifelse(test = lfc < 0 & pval > 1.3,
yes = "Decreased",
no = "nonsignificant")))
data <- mutate(data, color = case_when(data$lfc > 0 & data$pval > 1.3 ~ "Increased",
data$lfc < 0 & data$pval > 1.3 ~ "Decreased",
data$pval < 1.3 ~ "nonsignificant"))
# Make a basic ggplot2 object with x-y values
vol <- ggplot(data, aes(x = lfc, y = pval, color = color))
# Add ggplot2 layers
vol +
ggtitle(label = "Volcano Plot", subtitle = "Colored by fold-change directionality") +
ggtitle(label = "Volcano Plot", subtitle = "Colored by fold-change direction") +
geom_point(size = 2.5, alpha = 0.8, na.rm = T) +
scale_color_manual(name = "Directionality",
values = c(Increased = "#008B00", Decreased = "#CD4F39", nonsignificant = "darkgray")) +
Expand Down
Loading

0 comments on commit 4f9c67b

Please sign in to comment.