initial commit

bfurtwa · Jan 11, 2021 · b502287 · b502287
commit b502287
Show file tree

Hide file tree

Showing 19 changed files with 13,279 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,10 @@
+.idea/
+.ipynb_checkpoints
+
+# Python build files
+__pycache__/
+/dist/
+/build/
+/sceptre.egg-info/
+/*-env/
+/env-*/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Benjamin Furtwängler
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+# SCeptre
+**(Single Cell proteomics readout of expression)**
+
+SCeptre is a python package that extends the functionalities of Scanpy to analyze multiplexed single-cell proteomics data.
+
+### Installation from Github
+
+Tested on Ubuntu 20.04.1 LTS.  
+It's recommended to work in a dedicated conda environment. E.g:
+
+```
+conda create -n sceptre python=3.7
+conda activate sceptre
+```
+
+Clone the repository and `cd` into its root directory. Then:
+
+```
+pip install .
+```
+
+### Usage
+
+Usage is exemplified in the notebooks for the analysis from the mansucript "Quantitative Single-Cell Proteomics as a Tool to Characterize Cellular Hierarchies".
+
+The analysis can be replicated using the provided conda environment:
+
+```
+conda env create -f Schoof_et_al/code/environment.yml`
+conda activate sceptre
+pip install Schoof_et_al/code/sceptre-0.1-py3-none-any.whl
+```
+
+Find the notebooks in the subdirectory `Schoof_et_al/code` and the required data in `Schoof_et_al/data`.
+
+The following notebooks process the different datasets:
+
+Notebook | Description
+------------ | -------------
+300ms.ipynb | Sceptre analysis of the 'medium' dataset.
+500ms.ipynb | SCeptre analysis of the 'high' dataset.
+bulk.ipynb | SCeptre analysis of the 'bulk' dataset.
+integrated.ipynb | SCeptre analysis of the 'integrated' dataset.
+
diff --git a/Schoof_et_al/code/300ms.ipynb b/Schoof_et_al/code/300ms.ipynb
diff --git a/Schoof_et_al/code/300ms_vs_500ms.ipynb b/Schoof_et_al/code/300ms_vs_500ms.ipynb
diff --git a/Schoof_et_al/code/500ms.ipynb b/Schoof_et_al/code/500ms.ipynb
diff --git a/Schoof_et_al/code/booster_only.ipynb b/Schoof_et_al/code/booster_only.ipynb
diff --git a/Schoof_et_al/code/bulk.ipynb b/Schoof_et_al/code/bulk.ipynb
diff --git a/Schoof_et_al/code/bulk_show_empty.ipynb b/Schoof_et_al/code/bulk_show_empty.ipynb
diff --git a/Schoof_et_al/code/compile_facs_data.ipynb b/Schoof_et_al/code/compile_facs_data.ipynb
@@ -0,0 +1,159 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Notebook to compile the facs data table from the FlowJo output (+IndexSort plugin).\n",
+    ".fcs files were gated in FlowJo and well location was preserved using the IndexSort plugin. Bi-exponential transform was applied and the FACS data was exported as the transformed 'channel' tables. To preserve the well location, also the un-transformed 'scale' tables were exported. These tables are beeing merged in this notebook."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import os"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 300ms\n",
+    "plates = ['20759']\n",
+    "path = '../data/300ms/FJ/'\n",
+    "# 500ms\n",
+    "plates = ['20761']\n",
+    "path = '../data/500ms/FJ/'\n",
+    "# bulk_b\n",
+    "plates = ['Specimen_001_INX_Bulk_002_004',\n",
+    "          'Specimen_001_INX_Bulk_003_005',\n",
+    "          'Specimen_001_INX_Bulk_006_008']\n",
+    "path = '../data/bulk_b/FJ/'\n",
+    "# bulk_c\n",
+    "plates = ['Specimen_001_INX_Bulk_001_003',\n",
+    "          'Specimen_001_INX_Bulk_004_006',\n",
+    "          'Specimen_001_INX_Bulk_005_007']\n",
+    "path = '../data/bulk_c/FJ/'\n",
+    "# bulk_b_empty\n",
+    "plates = ['8227_INX_Bulk_P1_002',\n",
+    "          '8227_INX_Bulk_P2_003',\n",
+    "          '8227_INX_Bulk_P3_004']\n",
+    "path = '../data/bulk_b_empty/FJ/'\n",
+    "# enrich\n",
+    "plates = ['Specimen_001_INX_Celltype_P2_100920_004']\n",
+    "path = '../data/enrich/FJ/'\n",
+    "\n",
+    "\n",
+    "\n",
+    "plates = ['20759', # 300ms\n",
+    "          '20761', # 500ms\n",
+    "          'Specimen_001_INX_Bulk_002_004', # bulk_b_1\n",
+    "          'Specimen_001_INX_Bulk_003_005', # bulk_b_2\n",
+    "          'Specimen_001_INX_Bulk_006_008', # bulk_b_3\n",
+    "          'Specimen_001_INX_Bulk_001_003', # bulk_c_1\n",
+    "          'Specimen_001_INX_Bulk_004_006', # bulk_c_1\n",
+    "          'Specimen_001_INX_Bulk_005_007', # bulk_c_1\n",
+    "          '8227_INX_Bulk_P1_002', # bulk_b_e_1\n",
+    "          '8227_INX_Bulk_P2_003', # bulk_b_e_2\n",
+    "          'Specimen_001_INX_Celltype_P2_100920_004'] # enrich\n",
+    "\n",
+    "path = '../data/facs_data/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[None, None, None, None, None, None, None, None, None, None, None]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# get all csv files in channel and scale folder for each population\n",
+    "files = [f for f in os.listdir(path+'channel/') if f.endswith(\".csv\")]\n",
+    "fcs = ['_'.join(x.split('_')[1:-1]) for x in files]\n",
+    "pop = [(x.split('_')[-1]).split('.')[0] for x in files]\n",
+    "data = pd.DataFrame({'file': files, 'fcs': fcs, 'pop': pop, 'plate': [plates.index(p) for p in fcs]}).set_index('file')\n",
+    "dfs_channel = [pd.DataFrame() for i in range(len(plates))]\n",
+    "\n",
+    "for f in files:\n",
+    "    fj = pd.read_csv(path+'channel/{}'.format(f))\n",
+    "    fj['Population'] = data.loc[f, 'pop']\n",
+    "    dfs_channel[data.loc[f, 'plate']] = dfs_channel[data.loc[f, 'plate']].append(fj)\n",
+    "\n",
+    "dfs_scale = [pd.DataFrame() for i in range(len(plates))]\n",
+    "for f in files:\n",
+    "    fj = pd.read_csv(path+'scale/{}'.format(f))\n",
+    "    fj['Population'] = data.loc[f, 'pop']\n",
+    "    dfs_scale[data.loc[f, 'plate']] = dfs_scale[data.loc[f, 'plate']].append(fj)\n",
+    "\n",
+    "# replace the index columns with the non-transformed values from scale\n",
+    "for i in range(len(dfs_channel)):\n",
+    "    dfs_channel[i].loc[:, ['IdxCol', 'IdxRow', 'Time']] = dfs_scale[i].loc[:, ['IdxCol', 'IdxRow', 'Time']]\n",
+    "\n",
+    "# transform row index in letter and make Well column. Somehow, the IdxRow index from FJ is reversed\n",
+    "for i in range(len(dfs_channel)):\n",
+    "    dfs_channel[i][\"IdxRow\"] = dfs_channel[i][\"IdxRow\"].apply(\n",
+    "    lambda x: [\n",
+    "        \"A\",\n",
+    "        \"B\",\n",
+    "        \"C\",\n",
+    "        \"D\",\n",
+    "        \"E\",\n",
+    "        \"F\",\n",
+    "        \"G\",\n",
+    "        \"H\",\n",
+    "        \"I\",\n",
+    "        \"J\",\n",
+    "        \"K\",\n",
+    "        \"L\",\n",
+    "        \"M\",\n",
+    "        \"N\",\n",
+    "        \"O\",\n",
+    "        \"P\",\n",
+    "    ][-x]\n",
+    "    )\n",
+    "    dfs_channel[i][\"Well\"] = dfs_channel[i][\"IdxRow\"] + dfs_channel[i][\"IdxCol\"].astype(str)\n",
+    "    dfs_channel[i] = dfs_channel[i].rename(columns={'IdxRow': 'Row', 'IdxCol': 'Column'})\n",
+    "\n",
+    "# save one table for each plate\n",
+    "[dfs_channel[i].to_csv(path+'facs_data_P{}.txt'.format(i+1), sep='\\t', index=False) for i in range(len(dfs_channel))]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}