Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Date in the zip filename and in the dicom header #56

Open
kcho opened this issue Oct 3, 2023 · 3 comments
Open

Date in the zip filename and in the dicom header #56

kcho opened this issue Oct 3, 2023 · 3 comments
Assignees
Labels
enhancement New feature or request

Comments

@kcho
Copy link
Member

kcho commented Oct 3, 2023

DPACC is depending on the date inserted into the run sheet or the filename when finding missing scans, BIDS conversion, etc. However, we need to start using the date in the dicom headers as the true date of the scan and flag any run sheets and zip files that have mismatching dates.

@kcho kcho added the enhancement New feature or request label Oct 3, 2023
@nickckim
Copy link
Contributor

nickckim commented Oct 4, 2023

Beginning to build framework that creates a report comparing dicom information against zip file names and runsheet information. One difficulty is that some subjects are missing values or have unexpected values for "PatientID" (participant id), "PatientName" (subject id), or "StudyDate" (acquisition date) in their dicoms. In other words, for example, it is not safe to assume that all dicoms have information about acquisition date.

Please see /PHShome/nk582/dicom_id_info_dict.csv

@nickckim
Copy link
Contributor

nickckim commented Oct 4, 2023

import random
from pathlib import Path

import pydicom


def get_random_dicom_path_per_session(dicom_root):
    """
    Get random dicom path for each session in root directory.

    1. Start:
       /data/predict1/data_from_nda/MRI_ROOT/sourcedata ->

    2. Loop through each subject directory:
       /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345 ->

    3. Loop through each session directory:
       /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011 ->

    4. Checks to make sure at least one dicom directory exists:
       /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011/* ->

    5. Checks to make sure at least one file exists:
       /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011/*/*

    6. Finish:
       Selects random file from random dicom directory

    Parameters:
    - dicom_root: Dicom root directory.

    Returns:
    - List of random dicom paths for each sub-*/ses-*.
    """

    dicom_root = Path(dicom_root)
    random_dicom_path_per_session = []

    # /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345
    dicom_subject_directories = [
        d for d in dicom_root.glob("[A-Za-z][A-Za-z]?????") if d.is_dir()
    ]

    if not dicom_subject_directories:
        print(f"Warning: No dicom_subject_directories found in {dicom_root}.")
        return []

    for dicom_subject_directory in dicom_subject_directories:
        # /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011
        dicom_session_directories = [d for d in dicom_subject_directory.glob("ses-*")]

        if not dicom_session_directories:
            print(
                f"Warning: No dicom_session_directories found in {dicom_root}/{dicom_subject_directory}."
            )
            continue

        for dicom_session_directory in dicom_session_directories:
            # /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011/*
            dicom_directories = [
                d for d in dicom_session_directory.glob("*") if d.is_dir()
            ]

            if not dicom_directories:
                print(
                    f"Warning: No dicom_directories found in {dicom_root}/{dicom_subject_directory}/{dicom_session_directory}."
                )
                continue

            at_least_one_file_found = False
            while not at_least_one_file_found and dicom_directories:
                # /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011/*
                random_dicom_directory = random.choice(dicom_directories)

                # /data/predict1/data_from_nda/MRI_ROOT/sourcedata/AB12345/ses-190001011/*/*
                files_in_random_dicom_directory = [
                    f for f in random_dicom_directory.iterdir() if f.is_file()
                ]

                if not files_in_random_dicom_directory:
                    print(
                        f"Warning: No files found in {dicom_root}/{dicom_subject_directory}/{dicom_session_directory}/{random_dicom_directory}."
                    )
                    dicom_directories.remove(random_dicom_directory)
                else:
                    at_least_one_file_found = True
                    random_file_in_random_dicom_directory = random.choice(
                        files_in_random_dicom_directory
                    )
                    random_dicom_path_per_session.append(
                        random_file_in_random_dicom_directory
                    )

    return random_dicom_path_per_session


def get_id_info_from_dicom(path_to_dicom):
    """
    Get info relevant to participant id from dicom.

    Parameters:
    - path_to_dicom: Path to dicom.

    Returns:
    - Dictionary with  following structure:
        {
            Participant ID: {
                Subject ID: Value,
                Session ID: Value,
                Path to Dicom: Value
            }
        }
    """

    # Load DICOM
    ds = pydicom.dcmread(path_to_dicom)
    # Extract Participant ID
    patient_id = ds.get("PatientID", None)
    # Extract Subject ID
    subject_id = ds.get("PatientName", None)
    # Extract Session ID
    session_id = ds.get("StudyDate", None)

    return {
        patient_id: {
            "Subject": subject_id,
            "Session": session_id,
            "Path": str(path_to_dicom),  # Added the path of the DICOM file
        }
    }


def get_dicom_id_info_dict(dicom_root):
    """
    Calls get_id_info_from_dicom and get_id_info_from_dicom to get info
    relevant to participant id from random dicoms for all sub-*/ses-*.

    Parameters:
    - dicom_root: Dicom root directory.

    Returns:
    - dicom_id_info_dict: Dictionary of dictionaries from get_id_info_from_dicom.
    """

    random_dicom_path_per_session = get_random_dicom_path_per_session(dicom_root)
    dicom_id_info_dict = {}

    for path in random_dicom_path_per_session:
        info = get_id_info_from_dicom(path)
        dicom_id_info_dict.update(info)

    return dicom_id_info_dict

@nickckim
Copy link
Contributor

nickckim commented Oct 4, 2023

import csv

dicom_id_info_dict = get_dicom_id_info_dict(
    "/data/predict1/data_from_nda/MRI_ROOT/sourcedata"
)


def save_to_csv(data, filename):
    with open(filename, "w", newline="") as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["PatientID", "Subject", "Session", "Path"])
        for patient_id, info in data.items():
            writer.writerow(
                [patient_id, info["Subject"], info["Session"], info["Path"]]
            )


save_to_csv(dicom_id_info_dict, "dicom_id_info_dict.csv")

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Projects
None yet
Development

No branches or pull requests

3 participants