Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEST][REF] Move and test functions related to creating scans.tsv for AIBL #1390

Open
wants to merge 9 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 0 additions & 210 deletions clinica/iotools/bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,158 +480,6 @@ def create_participants_df(
return participant_df


def create_scans_dict(
clinical_data_dir: Path,
study_name: StudyName,
clinical_specifications_folder: Path,
bids_ids: list[str],
name_column_ids: str,
name_column_ses: str,
ses_dict: dict,
) -> pd.DataFrame:
"""[summary].

Parameters
----------
clinical_data_dir : Path
The path to the directory where the clinical data are stored.

study_name : StudyName
The name of the study (Ex ADNI).

clinical_specifications_folder : Path
The path to the folder containing the clinical specification files.

bids_ids : list of str
A list of bids ids.

name_column_ids : str
The name of the column where the subject id is contained.

name_column_ses : str
The name of the column where the viscode of the session is contained.

ses_dict : dict
The links the session id to the viscode of the session.

Returns
-------
pd.DataFrame :
A pandas DataFrame that contains the scans information for all sessions of all participants.
"""
import datetime

from clinica.utils.pet import Tracer
from clinica.utils.stream import cprint

scans_dict = {}
prev_file = ""
prev_sheet = ""

# Init the dictionary with the subject ids
for bids_id in bids_ids:
scans_dict[bids_id] = dict()
for session_id in {"ses-" + key for key in ses_dict[bids_id].keys()}:
scans_dict[bids_id][session_id] = {
"T1/DWI/fMRI/FMAP": {},
Tracer.PIB: {},
Tracer.AV45: {},
Tracer.FMM: {},
Tracer.FDG: {},
}

scans_specs = pd.read_csv(clinical_specifications_folder / "scans.tsv", sep="\t")
fields_dataset = []
fields_location = []
fields_bids = []
fields_mod = []

# Extract the fields available and the corresponding bids name, location and type
for i in range(0, len(scans_specs[study_name.value])):
field = scans_specs[study_name.value][i]
if not pd.isnull(field):
fields_dataset.append(field)
fields_bids.append(scans_specs["BIDS CLINICA"][i])
fields_location.append(scans_specs[f"{study_name.value} location"][i])
fields_mod.append(scans_specs["Modalities related"][i])

# For each field available extract the original name, extract from the file all the values and fill a data structure
for i in range(0, len(fields_dataset)):
# Location is composed by file/sheet
location = fields_location[i].split("/")
file_name = location[0]
sheet = location[1] if len(location) > 1 else ""
# Check if the file to read is already opened
if file_name == prev_file and sheet == prev_sheet:
pass
else:
file_ext = os.path.splitext(file_name)[1]
files_to_read = [f for f in clinical_data_dir.glob(file_name)]
if file_ext == ".xlsx":
file_to_read = pd.read_excel(files_to_read[0], sheet_name=sheet)
elif file_ext == ".csv":
file_path = files_to_read[0]

# Fix for malformed flutemeta file in AIBL (see #796).
# Some flutemeta lines contain a non-coded string value at the second-to-last position. This value
# contains a comma which adds an extra column and shifts the remaining values to the right. In this
# case, we just remove the erroneous content and replace it with -4 which AIBL uses as n/a value.
on_bad_lines = lambda x: "error" # noqa
if "flutemeta" in file_path.name and study_name == StudyName.AIBL:
on_bad_lines = lambda bad_line: bad_line[:-3] + [-4, bad_line[-1]] # noqa
file_to_read = pd.read_csv(
file_path,
sep=",",
engine="python",
on_bad_lines=on_bad_lines,
)
prev_file = file_name
prev_sheet = sheet

for bids_id in bids_ids:
original_id = bids_id.replace(f"sub-{study_name.value}", "")
for session_name in {"ses-" + key for key in ses_dict[bids_id].keys()}:
# When comparing sessions, remove the "-ses" prefix IF it exists
row_to_extract = file_to_read[
(file_to_read[name_column_ids] == int(original_id))
& (
list(
filter(
None, file_to_read[name_column_ses].str.split("ses-")
)
)[0][0]
== ses_dict[bids_id][
list(filter(None, session_name.split("ses-")))[0]
]
)
].index.tolist()
if len(row_to_extract) > 0:
row_to_extract = row_to_extract[0]
# Fill the dictionary with all the information
value = file_to_read.iloc[row_to_extract][fields_dataset[i]]

if study_name == StudyName.AIBL: # Deal with special format in AIBL
if value == "-4":
value = "n/a"
elif fields_bids[i] == "acq_time":
date_obj = datetime.datetime.strptime(value, "%m/%d/%Y")
value = date_obj.strftime("%Y-%m-%dT%H:%M:%S")

scans_dict[bids_id][session_name][fields_mod[i]][
fields_bids[i]
] = value
else:
cprint(
f"Scans information for {bids_id} {session_name} not found.",
lvl="info",
)
scans_dict[bids_id][session_name][fields_mod[i]][
fields_bids[i]
] = "n/a"

return scans_dict


def _write_bids_dataset_description(
study_name: StudyName,
bids_dir: Path,
Expand Down Expand Up @@ -742,64 +590,6 @@ def _get_pet_tracer_from_filename(filename: str) -> Tracer:
return Tracer(tracer)


def write_scans_tsv(
bids_dir: Path, participant_ids: List[str], scans_dict: dict
) -> None:
"""Write the scans dict into TSV files.

Parameters
----------
bids_dir : Path
The path to the BIDS directory.

participant_ids : List[str]
List of participant ids for which to write the scans TSV files.

scans_dict : dict
Dictionary containing scans metadata.

.. note::
This is the output of the function
`clinica.iotools.bids_utils.create_scans_dict`.

See also
--------
write_sessions_tsv
"""
supported_modalities = ("anat", "dwi", "func", "pet")

for sub in participant_ids:
for session_path in (bids_dir / sub).glob("ses-*"):
scans_df = pd.DataFrame()
tsv_file = (
bids_dir
/ sub
/ session_path.name
/ f"{sub}_{session_path.name}_scans.tsv"
)
tsv_file.unlink(missing_ok=True)

for mod in (bids_dir / sub / session_path.name).glob("*"):
if mod.name in supported_modalities:
for file in [
file for file in mod.iterdir() if mod.suffix != ".json"
]:
f_type = (
"T1/DWI/fMRI/FMAP"
if mod.name in ("anat", "dwi", "func")
else _get_pet_tracer_from_filename(file.name).value
)
row_to_append = pd.DataFrame(
scans_dict[sub][session_path.name][f_type], index=[0]
)
row_to_append.insert(
0, "filename", str(Path(mod.name) / Path(file.name))
)
scans_df = pd.concat([scans_df, row_to_append])
scans_df = scans_df.set_index("filename").fillna("n/a")
scans_df.to_csv(tsv_file, sep="\t", encoding="utf8")


def get_bids_subjs_list(bids_path: Path) -> List[str]:
"""Given a BIDS compliant dataset, return the list of all the subjects available.

Expand Down
1 change: 1 addition & 0 deletions clinica/iotools/converter_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"replace_sequence_chars",
"write_longitudinal_analysis",
"write_statistics",
"viscode_to_session",
]


Expand Down
Loading
Loading