Merge branch 'master' into jaclyn-taroni/711-use-rms-de

AlexsLemonade · Nov 25, 2024 · fd4e209 · fd4e209
2 parents 44f3b76 + 8138bf4
commit fd4e209
Show file tree

Hide file tree

Showing 5 changed files with 88 additions and 0 deletions.
diff --git a/components/dictionary.txt b/components/dictionary.txt
@@ -315,6 +315,7 @@ oncoproteins
 OPC
 OPCs
 OpenPBTA
+OpenScPCA
 Optimus
 orking
 orthotopic

diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/README.md b/scRNA-seq-advanced/setup/ewing-sarcoma/README.md
@@ -0,0 +1,13 @@
+The shell script in this directory downloads processed `SingleCellExperiment` objects and metadata from tumor samples in `SCPCP000015` using the data download mechanism from OpenScPCA.
+
+You may want to run it with your OpenScPCA conda environment activated.
+
+By default, it will use an AWS profile called `openscpca` and download data from the `2024-08-22` OpenScPCA release.
+
+You can alter the AWS profile or release with the following:
+
+```sh
+PROFILE={profile} RELEASE={release} ./download-openscpca-data.sh
+```
+
+Replacing `{profile}` and `{release}` with a profile with OpenScPCA access and valid release, respectively.
diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh b/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+set -euo pipefail
+
+# Profile to use with the download script
+PROFILE=${PROFILE:-openscpca}
+# Release to download
+RELEASE=${RELEASE:-2024-11-25}
+
+# Set the working directory to the directory of this file
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+# Set up directories
+ewing_data_dir="../../data/ewing-sarcoma"
+annotations_dir="${ewing_data_dir}/annotations"
+processed_dir="${ewing_data_dir}/processed"
+
+# Create directories if they don't exist yet
+mkdir -p "${annotations_dir}"
+mkdir -p "${processed_dir}"
+
+# Get download data script from OpenScPCA for convenience
+curl -O \
+    -L https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/a3d8a2c9144e8edb3894a7beeb89cdc6c3e6d681/download-data.py
+# Make executable
+chmod +x download-data.py
+
+# Download Ewing sarcoma tumor samples
+./download-data.py \
+    --samples 'SCPCS000490,SCPCS000492,SCPCS000493,SCPCS000494,SCPCS000495,SCPCS000496,SCPCS000749' \
+    --format SCE \
+    --release ${RELEASE} \
+    --data-dir ${ewing_data_dir} \
+    --profile ${PROFILE}
+
+# # Download Ewing sarcoma metadata
+./download-data.py \
+    --projects SCPCP000015 \
+    --metadata-only \
+    --release ${RELEASE} \
+    --data-dir ${ewing_data_dir} \
+    --profile ${PROFILE}
+
+# Remove existing files from processed directory
+if [ -z "$( ls -A ${processed_dir} )" ]; then
+   echo "No processed files yet!"
+else
+   rm -r ${processed_dir}/*
+fi
+
+# Move files from release folder
+mv ${ewing_data_dir}/${RELEASE}/SCPCP000015/* ${processed_dir}
+mv "${processed_dir}/single_cell_metadata.tsv" "${annotations_dir}/ewing_sarcoma_sample_metadata.tsv"
+
+# Remove PDX samples from metadata
+Rscript - << EOF
+
+sample_metadata_df <- readr::read_tsv("${annotations_dir}/ewing_sarcoma_sample_metadata.tsv")
+sample_metadata_df |>
+    dplyr::filter(stringr::str_detect(sample_type, "xenograft", negate = TRUE)) |>
+    readr::write_tsv("${annotations_dir}/ewing_sarcoma_sample_metadata.tsv")
+
+EOF
+
+# Clean up download data script
+rm download-data.py
+# Clean up the remnants of download structure
+rm -r ${ewing_data_dir}/${RELEASE}
diff --git a/scripts/link-data.sh b/scripts/link-data.sh
@@ -35,6 +35,8 @@ mkdir -p scRNA-seq/data/PBMC-TotalSeqB/normalized/
 # scRNA-seq-advanced module directories
 mkdir -p scRNA-seq-advanced/analysis/mouse-liver
 mkdir -p scRNA-seq-advanced/data/PBMC-TotalSeqB/normalized
+mkdir -p scRNA-seq-advanced/data/ewing-sarcoma/annotations
+mkdir -p scRNA-seq-advanced/data/ewing-sarcoma/processed
 mkdir -p scRNA-seq-advanced/data/glioblastoma-10x
 mkdir -p scRNA-seq-advanced/data/rms/integrated
 mkdir -p scRNA-seq-advanced/data/rms/annotations
@@ -83,6 +85,8 @@ link_locs=(
   scRNA-seq-advanced/data/PBMC-TotalSeqB/raw_feature_bc_matrix
   scRNA-seq-advanced/data/PBMC-TotalSeqB/normalized/PBMC_TotalSeqB_normalized_sce.rds
   scRNA-seq-advanced/data/glioblastoma-10x/raw_feature_bc_matrix
+  scRNA-seq-advanced/data/ewing-sarcoma/annotations/ewing_sarcoma_sample_metadata.tsv
+  scRNA-seq-advanced/data/ewing-sarcoma/processed
   scRNA-seq-advanced/data/hodgkins/markers
   scRNA-seq-advanced/data/reference
   scRNA-seq-advanced/data/rms/processed

diff --git a/scripts/syncup-s3.sh b/scripts/syncup-s3.sh
@@ -37,6 +37,7 @@ sync_dirs=(
   scRNA-seq/data/tabula-muris/alevin-quant/10X_P7_12
   scRNA-seq/data/reference
   scRNA-seq/index/Mus_musculus
+  scRNA-seq-advanced/data/ewing-sarcoma/processed
   scRNA-seq-advanced/data/glioblastoma-10x/raw_feature_bc_matrix
   scRNA-seq-advanced/data/hodgkins/markers
   scRNA-seq-advanced/data/PBMC-TotalSeqB/raw_feature_bc_matrix
@@ -59,6 +60,7 @@ sync_files=(
   scRNA-seq/data/tabula-muris/normalized/TM_normalized.rds
   scRNA-seq/data/tabula-muris/TM_droplet_metadata.csv
   scRNA-seq-advanced/analysis/mouse-liver/markers/cluster07_markers.tsv
+  scRNA-seq-advanced/data/ewing-sarcoma/annotations/ewing_sarcoma_sample_metadata.tsv
   scRNA-seq-advanced/data/rms/annotations/rms_sample_metadata.tsv
   scRNA-seq-advanced/data/reference/hs_mitochondrial_genes.tsv
 )
-Original file line number
+Diff line change
@@ Expand Up / @@ -315,6 +315,7 @@ oncoproteins @@
     OPC
     OPCs
     OpenPBTA
+    OpenScPCA
     Optimus
     orking
     orthotopic
@@ Expand Down @@