Skip to content

Commit

Permalink
Merge pull request #2 from VemundFredriksen/tumor-isolation
Browse files Browse the repository at this point in the history
Tumor isolation
  • Loading branch information
sosevle authored Oct 13, 2023
2 parents 3408f5a + 3053d9c commit 08da925
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 2 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
/assets/*
!/assets/readme.md
!/assets/readme.md
/out/*
/synthlung.egg*/
/build/*
*__pycache__*
10 changes: 9 additions & 1 deletion assets/readme.md
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
## How to
# Assets

### How to format and preprocess?

1. Install synthlung by running `pip install .`
2. Download the MSD Lung Tumor dataset from [here](https://drive.google.com/drive/folders/1HqEgzS8BV2c7xYNrZdEAnrHk7osJJ--2).
3. Extract the zip file into `/assets/`.
4. Run `synthlung format --dataset msd` to adjust dataset format
5. Run `synthlung seed --dataset msd` to extract tumor seeds from the dataset
23 changes: 23 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import setuptools

setuptools.setup(
name="synthlung",
version="0.0.1",
author="Vemund Fredriksen and Svein Ole Matheson Sevle",
author_email="vemund.fredriksen@hotmailcom",
description="Package for generating synthetic lung tumors",
url="https://github.com/VemundFredriksen/SynthLung",
packages=setuptools.find_packages(),
entry_points={"console_scripts": ["synthlung = synthlung.__main__:main"]},
install_requires=[
"numpy",
"torch",
"tqdm",
"monai"
],
classifiers=[
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
],
python_requires=">=3.10",
)
Empty file added synthlung/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions synthlung/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import argparse
from synthlung.utils.tumor_isolation_pipeline import TumorCropPipeline
from synthlung.utils.dataset_formatter import MSDImageSourceFormatter, MSDGenerateJSONFormatter
from synthlung.utils.tumor_insertion_pipeline import InsertTumorPipeline
import json

def seed_msd():
json_file_path = "./assets/source/msd/dataset.json"

with open(json_file_path, 'r') as json_file:
image_dict = json.load(json_file)
crop_pipeline = TumorCropPipeline()
crop_pipeline(image_dict)
formatter = MSDGenerateJSONFormatter("./assets/seeds/msd/")
formatter.generate_json()

def format_msd():
formatter = MSDImageSourceFormatter()
formatter.format()
formatter.generate_json()

def generate_randomized_tumors():
tumor_inserter = InsertTumorPipeline()
json_file_path = "./assets/source/msd/dataset.json"
with open(json_file_path, 'r') as json_file:
image_dict = json.load(json_file)

json_seed_path = "./assets/seeds/msd/dataset.json"
with open(json_seed_path, 'r') as json_file:
seeds_dict = json.load(json_file)

tumor_inserter(image_dict, seeds_dict)

def main():
parser = argparse.ArgumentParser(description="Create your synthetic lung tumors!")

parser.add_argument("action", choices=["format", "seed", "generate"], help="Action to perform")
parser.add_argument("--dataset", help="Dataset to format", choices=["msd"])
args = parser.parse_args()

if args.action == "format":
if(args.dataset == "msd"):
format_msd()
elif args.action == "seed":
if(args.dataset == "msd"):
seed_msd()
elif args.action == "generate":
if(args.dataset == "msd"):
generate_randomized_tumors()
else:
print("Action not recognized")
Empty file added synthlung/utils/__init__.py
Empty file.
79 changes: 79 additions & 0 deletions synthlung/utils/dataset_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import os
import shutil
import json
from abc import ABC, abstractmethod

NII_GZ_EXTENSION = '.nii.gz'
IMAGE_NII_GZ = 'image.nii.gz'
LABEL_NII_GZ = 'label.nii.gz'

class ImageSourceFormatter(ABC):

@abstractmethod
def format(self) -> None:
pass

class JSONGenerator(ABC):

@abstractmethod
def generate_json(self) -> None:
pass

class MSDImageSourceFormatter(ImageSourceFormatter, JSONGenerator):
def __init__(self) -> None:
self.target_directory = "./assets/source/msd/"
self.source_directory = "./assets/Task06_Lung/"

def format(self) -> None:
if not os.path.exists(self.target_directory):
os.makedirs(self.target_directory)

self.__move_images__(self.source_directory + "/imagesTr/", "image")
self.__move_images__(self.source_directory + "/labelsTr/", "label")

def generate_json(self) -> None:
self.__generate_json__()

def __move_images__(self, images_directory: str, suffix: str) -> None:
for filename in os.listdir(images_directory):
if filename.endswith((NII_GZ_EXTENSION)):
source_file_path = os.path.join(images_directory, filename)
identity= filename[:filename.index(NII_GZ_EXTENSION)]

new_filename = f"source_msd_{identity}_{suffix}{NII_GZ_EXTENSION}"
target_file_path = os.path.join(self.target_directory, new_filename)

shutil.copy(source_file_path, target_file_path)

def __generate_json__(self) -> None:
dataset_json = []
for filename in os.listdir(self.target_directory):
if filename.endswith((IMAGE_NII_GZ)):
sample_data = {
"image": self.target_directory + filename,
"label": self.target_directory + filename[:filename.index(IMAGE_NII_GZ)] + LABEL_NII_GZ
}
dataset_json.append(sample_data)

with open(self.target_directory + "/dataset.json", 'w') as json_file:
json.dump(dataset_json, json_file, indent=4)

class MSDGenerateJSONFormatter(JSONGenerator):
def __init__(self, path) -> None:
self.path = path

def generate_json(self) -> None:
self.__generate_json__()

def __generate_json__(self) -> None:
dataset_json = []
for filename in os.listdir(self.path):
if filename.endswith((IMAGE_NII_GZ)):
sample_data = {
"seed_image": self.path + filename,
"seed_label": self.path + filename[:filename.index(IMAGE_NII_GZ)] + LABEL_NII_GZ
}
dataset_json.append(sample_data)

with open(self.path + "/dataset.json", 'w') as json_file:
json.dump(dataset_json, json_file, indent=4)
81 changes: 81 additions & 0 deletions synthlung/utils/tumor_insertion_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from typing import Any
from monai.transforms import (Compose, LoadImaged, SaveImaged)
import numpy as np
import datetime
import random
import json
import os

class InsertTumor(object):
def __call__(self, sample) -> None:
image, label, seed_image, seed_label = sample['image'], sample['label'], sample['seed_image'], sample['seed_label']

lungmask = self.__generate_random_lungmask__(image)
offset_randomizer = np.random.default_rng(3)

while (True): #Temporary offset calculation
offset_location = (offset_randomizer.standard_normal() * (sample['image_meta_dict']['spatial_shape'] - seed_image.shape)).astype(int)
if (lungmask[offset_location[0], offset_location[1], offset_location[2]] == 1):
break

image[offset_location[0] : offset_location[0] + seed_image.shape[0], offset_location[1] : offset_location[1] + seed_image.shape[1], offset_location[2] : offset_location[2] + seed_image.shape[2]][seed_label == 1] = seed_image[seed_label == 1]
label[offset_location[0] : offset_location[0] + seed_image.shape[0], offset_location[1] : offset_location[1] + seed_image.shape[1], offset_location[2] : offset_location[2] + seed_image.shape[2]][seed_label == 1] = 1

self.__remove_filename__(image)
self.__remove_filename__(label)

return sample

def __generate_random_lungmask__(self, image): #Temporary method
shape_dim_0 = image.shape[0]
shape_dim_1 = image.shape[1]
shape_dim_2 = image.shape[2]
dim0min = shape_dim_0 // 4
dim0max = 3 * dim0min
dim1min = shape_dim_1 // 4
dim1max = 3 * dim1min
dim2min = shape_dim_2 // 4
dim2max = 3 * dim2min
arra = np.where(image > 0, 0, 0)
arra[dim0min : dim0max, dim1min : dim1max, dim2min : dim2max] = 1
return arra

def __remove_filename__(self, image):
image.meta['filename_or_obj'] = ""

class InsertTumorPipeline(object):
def __init__(self) -> None:
self.randomized_dict = []
self.time = f"{datetime.datetime.now()}".replace(" ", "-").replace(":", ".")
self.dir_name = f"./assets/artificial_tumors/{self.time}/"
self.compose = Compose([
LoadImaged(keys=['image', 'label', 'seed_image', 'seed_label']),
InsertTumor(),
SaveImaged(keys=['image'], output_dir=f"{self.dir_name}randomzied_images/", output_postfix="image"),
SaveImaged(keys=['label'], output_dir=f"{self.dir_name}randomzied_images/", output_postfix="label")
])

def __call__(self, image_dict, seeds_dict) -> None:
self.__generate_randomized_dict__(image_dict, seeds_dict)
self.compose(self.randomized_dict)

def __generate_randomized_dict__(self, image_dict, seeds_dict):
for n, i in enumerate(range(1)):
image = random.choice(image_dict)
seed = random.choice(seeds_dict)

output_name = {"randomized_image" : f"{self.dir_name}randomzied_images/image_{n}.nii.gz",
"randomized_label" : f"{self.dir_name}randomzied_images/label_{n}.nii.gz"}

self.randomized_dict.append({**image, **seed, **output_name})

self.__log__()

def __log__(self):
log_name = self.dir_name + "log/"

if not os.path.exists(log_name):
os.makedirs(log_name)

with open(log_name + "dataset.json", 'w') as json_file:
json.dump(self.randomized_dict, json_file, indent=4)
68 changes: 68 additions & 0 deletions synthlung/utils/tumor_isolation_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import Any
from monai.transforms import (Compose, LoadImaged, SaveImaged)
import monai.config
import numpy as np
import tqdm

class CutOutTumor(object):
def __call__(self, sample: dict) -> dict:
image, label = sample['image'], sample['label']
bolean_mask = (label > 0).astype(np.uint8)

indeces = np.argwhere(bolean_mask)
y_min, x_min, z_min = indeces.min(axis=0)
y_max, x_max, z_max = indeces.max(axis=0)

clipped_image = image[y_min:y_max+1, x_min:x_max+1, z_min:z_max+1]
clipped_label = label[y_min:y_max+1, x_min:x_max+1, z_min:z_max+1]
clipped_image = np.where(clipped_label == 1, clipped_image, -1024)

sample['seed_image'] = clipped_image
sample['seed_image_meta_dict'] = sample['image_meta_dict']
self.__update_image_dims__(sample['seed_image_meta_dict'], clipped_image.shape)
self.__update_image_filename__(sample['seed_image_meta_dict'])

sample['seed_label'] = clipped_label
sample['seed_label_meta_dict'] = sample['label_meta_dict']
self.__update_image_dims__(sample['seed_label_meta_dict'], clipped_label.shape)
self.__update_label_filename__(sample['seed_label'])

return sample

def __update_image_dims__(self, meta_dict: dict, new_dims: tuple) -> None:
meta_dict['size'] = new_dims
meta_dict['dim'][1] = new_dims[0]
meta_dict['dim'][2] = new_dims[1]
meta_dict['dim'][3] = new_dims[2]
meta_dict['spatial_shape'][0] = new_dims[0]
meta_dict['spatial_shape'][1] = new_dims[1]
meta_dict['spatial_shape'][2] = new_dims[2]

def __update_image_filename__(self, image):
image['filename_or_obj'] = image['filename_or_obj'].replace('source_', 'seed_')

def __update_label_filename__(self, label):
label.meta['filename_or_obj'] = label.meta['filename_or_obj'].replace('source_', 'seed_')


class TumorCropPipeline(object):
monai.config.BACKEND = "Nibabel"
def __init__(self) -> None:
self.compose = Compose([
LoadImaged(keys=['image', 'label'], image_only = False),
CutOutTumor(),
SaveImaged(keys=['seed_image'], output_dir='./assets/seeds/msd/', output_postfix='', separate_folder=False),
SaveImaged(keys=['seed_label'], output_dir='./assets/seeds/msd/', output_postfix='', separate_folder=False)
])

def __call__(self, image_dict) -> None:
if isinstance(image_dict, list):
print(f"Tumor isolation for {len(image_dict)} images starting...")
for sample in tqdm.tqdm(image_dict):
self.compose(sample)
else:
self.compose(image_dict)

print("Tumor isolation complete!")


0 comments on commit 08da925

Please sign in to comment.