update source path

keiserlab · Feb 7, 2019 · 48adb3a · 48adb3a
1 parent a9816cb
commit 48adb3a
Show file tree

Hide file tree

Showing 11 changed files with 286 additions and 318 deletions.
diff --git a/1.1) Preprocessing - Reinhard Normalization and WSI Tiling.ipynb b/1.1) Preprocessing - Reinhard Normalization and WSI Tiling.ipynb
@@ -18,13 +18,14 @@
    "outputs": [],
    "source": [
     "import os\n",
+    "import glob\n",
     "import numpy as np\n",
     "import cv2\n",
     "import matplotlib.pyplot as plt\n",
     "import pyvips as Vips\n",
     "from tqdm import tqdm\n",
     "\n",
-    "import vips_utils, normalize"
+    "from utils import vips_utils, normalize"
    ]
   },
   {
@@ -33,8 +34,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "RAW_DIR = '/home/ziqi/Desktop/data/plaques_WSIs/train&validation/'\n",
-    "SAVE_DIR = '/home/ziqi/Desktop/data/norm_tiles/'"
+    "TRAIN_WSI_DIR = 'data/Dataset 1a Development_train/'              # WSIs in the training set\n",
+    "VAL_WSI_DIR = 'data/Dataset 1b Development_validation/'           # WSIs in the validation set\n",
+    "\n",
+    "SAVE_DIR = 'data/norm_tiles/'"
    ]
   },
   {
@@ -43,7 +46,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ref_imagename = 'NA5002_2AB.svs'"
+    "if not os.path.exists(SAVE_DIR):\n",
+    "        os.makedirs(SAVE_DIR)"
    ]
   },
   {
@@ -52,37 +56,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "imagenames = sorted(os.listdir(RAW_DIR))"
+    "ref_imagename = 'NA5002_2AB.svs'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "imagenames.remove('NA5005-02_AB.svs')\n",
+    "wsi_train = os.listdir(TRAIN_WSI_DIR)\n",
+    "wsi_val = os.listdir(VAL_WSI_DIR)\n",
+    "\n",
+    "imagenames = sorted(wsi_val + wsi_train)\n",
+    "imagenames.remove('NA5005-02_AB.svs')             # this WSI was digitalized at 40x, need resize down to 20x\n",
     "imagenames.append('NA5005-02_AB.svs')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 62,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 43min 14s, sys: 2min 28s, total: 45min 42s\n",
-      "Wall time: 3min 58s\n"
+      "CPU times: user 57min 9s, sys: 3min 15s, total: 1h 24s\n",
+      "Wall time: 4min 52s\n"
      ]
     }
    ],
    "source": [
     "%%time\n",
     "# Load reference image, fit Reinhard normalizer\n",
-    "ref_image = Vips.Image.new_from_file(RAW_DIR + ref_imagename, level=0)\n",
+    "ref_image = Vips.Image.new_from_file(TRAIN_WSI_DIR + ref_imagename, level=0)\n",
     "\n",
     "normalizer = normalize.Reinhard()\n",
     "normalizer.fit(ref_image)"
@@ -104,7 +112,10 @@
    "source": [
     "stats_dict = {}\n",
     "for imagename in tqdm(imagenames[:-1]):\n",
-    "    vips_img = Vips.Image.new_from_file(RAW_DIR + imagename, level=0)\n",
+    "    try:\n",
+    "        vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)\n",
+    "    except:\n",
+    "        vips_img = Vips.Image.new_from_file(VAL_WSI_DIR + imagename, level=0)\n",
     "    out = normalizer.transform(vips_img)\n",
     "    out.filename = vips_img.filename\n",
     "    vips_utils.save_and_tile(out, SAVE_DIR)\n",
@@ -127,7 +138,7 @@
    "source": [
     "# Resize the single 40x image down to 20x\n",
     "for imagename in tqdm(imagenames[-1:]):\n",
-    "    vips_img = Vips.Image.new_from_file(RAW_DIR + imagename, level=0)\n",
+    "    vips_img = Vips.Image.new_from_file(TRAIN_WSI_DIR + imagename, level=0)\n",
     "    vips_img = vips_img.resize(0.5)\n",
     "    out = normalizer.transform(vips_img)\n",
     "    out.filename = vips_img.filename\n",
@@ -180,7 +191,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.5.4"
   }
  },
  "nbformat": 4,

diff --git a/1.2) Preprocessing - Plaque Detection and Image Cropping.ipynb b/1.2) Preprocessing - Plaque Detection and Image Cropping.ipynb
@@ -11,10 +11,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 3,
+   "metadata": {},
    "outputs": [],
    "source": [
     "import csv\n",
@@ -31,10 +29,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 4,
+   "metadata": {},
    "outputs": [],
    "source": [
     "# Set random seeds\n",
@@ -43,20 +39,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "collapsed": true
-   },
+   "execution_count": 5,
+   "metadata": {},
    "outputs": [],
    "source": [
-    "IMG_DIR = '/home/ziqi/Desktop/data/norm_tiles/'\n",
-    "SAVE_DIR = '/home/ziqi/Desktop/data/seg/'\n",
+    "IMG_DIR = 'data/norm_tiles/'\n",
+    "SAVE_DIR = 'data/seg/'\n",
     "\n",
     "BLOBS_DIR = SAVE_DIR + 'blobs/'\n",
     "IMG_BBOXES = SAVE_DIR + 'blobs_bboxes/'\n",
     "NEGATIVES = SAVE_DIR + 'negatives/'"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists(SAVE_DIR):\n",
+    "        os.makedirs(SAVE_DIR)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,

diff --git a/1.3) Preprocessing - Dataset Splitting and Size Filtering.ipynb b/1.3) Preprocessing - Dataset Splitting and Size Filtering.ipynb
@@ -45,13 +45,13 @@
    "outputs": [],
    "source": [
     "BLOB_CSV = 'image_details.csv'\n",
-    "CSV_DIR = '/home/ziqi/Desktop/data/seg/'\n",
+    "CSV_DIR = 'data/seg/'\n",
     "\n",
-    "NORM_SRC =  '/home/ziqi/Desktop/data/seg/blobs_bboxes/'\n",
-    "NORM_DEST = '/home/ziqi/Desktop/data/seg/size_filtered/blobs_bboxes/'\n",
+    "NORM_SRC =  'data/seg/blobs_bboxes/'\n",
+    "NORM_DEST = 'data/seg/size_filtered/blobs_bboxes/'\n",
     "\n",
-    "RAW_SRC   = '/home/ziqi/Desktop/data/seg/blobs/'\n",
-    "RAW_DEST  = '/home/ziqi/Desktop/data/seg/size_filtered/blobs/'"
+    "RAW_SRC   = 'data/seg/blobs/'\n",
+    "RAW_DEST  = 'data/seg/size_filtered/blobs/'"
    ]
   },
   {

diff --git a/2.1) CNN Models - Model Training and Development.ipynb b/2.1) CNN Models - Model Training and Development.ipynb
@@ -23,11 +23,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "import time\n",
+    "import time, os\n",
     "\n",
     "import torch\n",
     "torch.manual_seed(123456789)\n",
@@ -48,36 +48,44 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'mean': array([0.77906426, 0.74919518, 0.77529276]), 'std': array([0.13986633, 0.15931302, 0.17665639])}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "batch_size = 64\n",
-    "num_workers = 16\n",
-    "\n",
-    "norm = np.load('utils/normalization.npy').item()\n",
-    "print(norm)\n",
-    "\n",
     "csv_path = {\n",
-    "    'train': 'utils/train.csv',\n",
-    "    'dev': 'utils/dev.csv',\n",
+    "    'train': 'data/CSVs/train.csv',\n",
+    "    'dev': 'data/CSVs/dev.csv',\n",
     "}\n",
     "\n",
-    "DATA_DIR = '/home/ziqi/Desktop/data/seg/size_filtered/blobs/'\n",
-    "NEGATIVE_DIR = '/home/ziqi/Desktop/data/seg/negatives/'\n",
-    "SAVE_DIR = '/home/ziqi/Desktop/data/'\n",
+    "DATA_DIR = 'data/seg/size_filtered/blobs/'\n",
+    "NEGATIVE_DIR = 'data/seg/negatives/'\n",
+    "SAVE_DIR = 'models/'\n",
     "\n",
     "image_classes = ['cored','diffuse','CAA']"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "if not os.path.exists(SAVE_DIR):\n",
+    "        os.makedirs(SAVE_DIR)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_size = 64\n",
+    "num_workers = 16\n",
+    "\n",
+    "norm = np.load('utils/normalization.npy').item()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 7,
@@ -1070,7 +1078,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.5.4"
   }
  },
  "nbformat": 4,