From a6dc06d0c8deec0e712e495c02479db01c986527 Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Sat, 23 Nov 2024 11:28:12 -0500 Subject: [PATCH 1/7] Add bash script for grabbing processed EWS data from OpenScPCA --- .../download-and-rename-openscpca-data.sh | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh b/scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh new file mode 100755 index 00000000..396f89f1 --- /dev/null +++ b/scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +set -euo pipefail + +# Profile to use with the download script +PROFILE=${PROFILE:-openscpca} +# Release to download +RELEASE=${RELEASE:-2024-08-22} + +# Set the working directory to the directory of this file +cd "$(dirname "${BASH_SOURCE[0]}")" + +# Set up directories +ewing_data_dir="../../data/ewing-sarcoma" +annotations_dir="${ewing_data_dir}/annotations" +processed_dir="${ewing_data_dir}/processed" + +# Create directories if they don't exist yet +mkdir -p "${annotations_dir}" +mkdir -p "${processed_dir}" + +# Get download data script from OpenScPCA for convenience +curl -O \ + -L https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/a3d8a2c9144e8edb3894a7beeb89cdc6c3e6d681/download-data.py +# Make executable +chmod +x download-data.py + +# Download Ewing sarcoma tumor samples +./download-data.py \ + --samples 'SCPCS000490,SCPCS000492,SCPCS000493,SCPCS000494,SCPCS000495,SCPCS000496,SCPCS000749' \ + --format SCE \ + --release ${RELEASE} \ + --data-dir ${ewing_data_dir} \ + --profile ${PROFILE} + +# # Download Ewing sarcoma metadata +./download-data.py \ + --projects SCPCP000015 \ + --metadata-only \ + --release ${RELEASE} \ + --data-dir ${ewing_data_dir} \ + --profile ${PROFILE} + +# Remove existing files from processed directory +if [ -z "$( ls -A ${processed_dir} )" ]; then + echo "No processed files yet!" +else + rm -r ${processed_dir}/* +fi + +# Move files from release folder +mv ${ewing_data_dir}/${RELEASE}/SCPCP000015/* ${processed_dir} +mv "${processed_dir}/single_cell_metadata.tsv" "${annotations_dir}/ewing_sarcoma_sample_metadata.tsv" + +# Remove PDX samples from metadata +Rscript - << EOF + +sample_metadata_df <- readr::read_tsv("${annotations_dir}/ewing_sarcoma_sample_metadata.tsv") +sample_metadata_df |> + dplyr::filter(stringr::str_detect(sample_type, "xenograft", negate = TRUE)) |> + readr::write_tsv("${annotations_dir}/ewing_sarcoma_sample_metadata.tsv") + +EOF + +# Clean up download data script +rm download-data.py +# Clean up the remnants of download structure +rm -r ${ewing_data_dir}/${RELEASE} From 179aaeccbf1e2f3ea3da91661905dd84f624b6e5 Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Sat, 23 Nov 2024 11:29:56 -0500 Subject: [PATCH 2/7] Add EWS data to S3 sync script --- scripts/syncup-s3.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/syncup-s3.sh b/scripts/syncup-s3.sh index 8f66b434..07063a28 100644 --- a/scripts/syncup-s3.sh +++ b/scripts/syncup-s3.sh @@ -37,6 +37,7 @@ sync_dirs=( scRNA-seq/data/tabula-muris/alevin-quant/10X_P7_12 scRNA-seq/data/reference scRNA-seq/index/Mus_musculus + scRNA-seq-advanced/data/ewing-sarcoma/processed scRNA-seq-advanced/data/glioblastoma-10x/raw_feature_bc_matrix scRNA-seq-advanced/data/hodgkins/markers scRNA-seq-advanced/data/PBMC-TotalSeqB/raw_feature_bc_matrix @@ -59,6 +60,7 @@ sync_files=( scRNA-seq/data/tabula-muris/normalized/TM_normalized.rds scRNA-seq/data/tabula-muris/TM_droplet_metadata.csv scRNA-seq-advanced/analysis/mouse-liver/markers/cluster07_markers.tsv + scRNA-seq-advanced/data/ewing-sarcoma/annotations/ewing_sarcoma_sample_metadata.tsv scRNA-seq-advanced/data/rms/annotations/rms_sample_metadata.tsv scRNA-seq-advanced/data/reference/hs_mitochondrial_genes.tsv ) From 654609f88cc92fdc589b1622efa5ebc86dc33e78 Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Sat, 23 Nov 2024 11:33:00 -0500 Subject: [PATCH 3/7] Add EWS to link data shell script --- scripts/link-data.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/link-data.sh b/scripts/link-data.sh index 35810556..20e3769c 100644 --- a/scripts/link-data.sh +++ b/scripts/link-data.sh @@ -35,6 +35,8 @@ mkdir -p scRNA-seq/data/PBMC-TotalSeqB/normalized/ # scRNA-seq-advanced module directories mkdir -p scRNA-seq-advanced/analysis/mouse-liver mkdir -p scRNA-seq-advanced/data/PBMC-TotalSeqB/normalized +mkdir -p scRNA-seq-advanced/data/ewing-sarcoma/annotations +mkdir -p scRNA-seq-advanced/data/ewing-sarcoma/processed mkdir -p scRNA-seq-advanced/data/glioblastoma-10x mkdir -p scRNA-seq-advanced/data/rms/integrated mkdir -p scRNA-seq-advanced/data/rms/annotations @@ -83,6 +85,8 @@ link_locs=( scRNA-seq-advanced/data/PBMC-TotalSeqB/raw_feature_bc_matrix scRNA-seq-advanced/data/PBMC-TotalSeqB/normalized/PBMC_TotalSeqB_normalized_sce.rds scRNA-seq-advanced/data/glioblastoma-10x/raw_feature_bc_matrix + scRNA-seq-advanced/data/ewing-sarcoma/annotations/ewing_sarcoma_sample_metadata.tsv + scRNA-seq-advanced/data/ewing-sarcoma/processed scRNA-seq-advanced/data/hodgkins/markers scRNA-seq-advanced/data/reference scRNA-seq-advanced/data/rms/processed From 8484b0bec5b228470c60f0fa3b51015176bfaaa5 Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Sat, 23 Nov 2024 17:41:42 -0500 Subject: [PATCH 4/7] Make name shorter and more accurate --- ...ad-and-rename-openscpca-data.sh => download-openscpca-data.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scRNA-seq-advanced/setup/ewing-sarcoma/{download-and-rename-openscpca-data.sh => download-openscpca-data.sh} (100%) diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh b/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh similarity index 100% rename from scRNA-seq-advanced/setup/ewing-sarcoma/download-and-rename-openscpca-data.sh rename to scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh From 2d6914ab1dc1047a756d66f98d472517e63b45de Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Sat, 23 Nov 2024 17:42:07 -0500 Subject: [PATCH 5/7] Add a short README --- scRNA-seq-advanced/setup/ewing-sarcoma/README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 scRNA-seq-advanced/setup/ewing-sarcoma/README.md diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/README.md b/scRNA-seq-advanced/setup/ewing-sarcoma/README.md new file mode 100644 index 00000000..fd45eebf --- /dev/null +++ b/scRNA-seq-advanced/setup/ewing-sarcoma/README.md @@ -0,0 +1,13 @@ +The shell script in this directory downloads processed `SingleCellExperiment` objects and metadata from tumor samples in `SCPCP000015` using the data download mechanism from OpenScPCA. + +You may want to run it with your OpenScPCA conda environment activated. + +By default, it will use an AWS profile called `openscpca` and download data from the `2024-08-22` OpenScPCA release. + +You can alter the AWS profile or release with the following: + +```sh +PROFILE={profile} RELEASE={release} ./download-openscpca-data.sh +``` + +Replacing `{profile}` and `{release}` with a profile with OpenScPCA access and valid release, respectively. From 9dbc28d2f0979c51afd2aa90d1c563cb9101812e Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Mon, 25 Nov 2024 09:47:35 -0500 Subject: [PATCH 6/7] Add OpenScPCA to dictionary --- components/dictionary.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/components/dictionary.txt b/components/dictionary.txt index 3379690a..5c42f78a 100644 --- a/components/dictionary.txt +++ b/components/dictionary.txt @@ -315,6 +315,7 @@ oncoproteins OPC OPCs OpenPBTA +OpenScPCA Optimus orking orthotopic From 4284c2447bddb88648d31e0177191a0ad1e1bc3c Mon Sep 17 00:00:00 2001 From: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:35:39 -0500 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- .../setup/ewing-sarcoma/download-openscpca-data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh b/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh index 396f89f1..7ccae206 100755 --- a/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh +++ b/scRNA-seq-advanced/setup/ewing-sarcoma/download-openscpca-data.sh @@ -5,7 +5,7 @@ set -euo pipefail # Profile to use with the download script PROFILE=${PROFILE:-openscpca} # Release to download -RELEASE=${RELEASE:-2024-08-22} +RELEASE=${RELEASE:-2024-11-25} # Set the working directory to the directory of this file cd "$(dirname "${BASH_SOURCE[0]}")"