Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log study info in anonymisation #587

Merged
merged 2 commits into from
Jan 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 52 additions & 43 deletions orthanc/orthanc-anon/plugin/pixl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from pydicom import dcmread

import orthanc
from pixl_dcmd.dicom_helpers import get_study_info
from pixl_dcmd.main import (
anonymise_dicom_and_update_db,
parse_validation_results,
Expand All @@ -53,6 +54,8 @@

from core.project_config.pixl_config_model import PixlConfig

from pixl_dcmd.dicom_helpers import StudyInfo

ORTHANC_USERNAME = config("ORTHANC_USERNAME")
ORTHANC_PASSWORD = config("ORTHANC_PASSWORD")
ORTHANC_URL = "http://localhost:8042"
Expand Down Expand Up @@ -234,7 +237,6 @@ def _import_studies_from_raw(

Args:
study_resource_ids: Resource IDs of the study in Orthanc Raw
study_uids: Corresponding StudyInstanceUIDs
project_name: Name of the project

- Pull studies from Orthanc Raw based on its resource ID
Expand All @@ -246,7 +248,8 @@ def _import_studies_from_raw(
anonymised_study_uids = []

for study_resource_id, study_uid in zip(study_resource_ids, study_uids, strict=False):
anonymised_uid = _anonymise_study_and_upload(study_resource_id, study_uid, project_name)
logger.debug("Processing project '{}', study '{}' ", project_name, study_uid)
anonymised_uid = _anonymise_study_and_upload(study_resource_id, project_name)
if anonymised_uid:
anonymised_study_uids.append(anonymised_uid)

Expand All @@ -270,27 +273,26 @@ def _import_studies_from_raw(
send_study(study_id=resource_id, project_name=project_name)


def _anonymise_study_and_upload(
study_resource_id: str, study_uid: str, project_name: str
) -> str | None:
def _anonymise_study_and_upload(study_resource_id: str, project_name: str) -> str | None:
zipped_study_bytes = get_study_zip_archive_from_raw(resource_id=study_resource_id)

study_info = _get_study_info_from_first_file(zipped_study_bytes)
logger.info("Processing project '{}', {}", project_name, study_info)

with ZipFile(zipped_study_bytes) as zipped_study:
try:
anonymised_instances_bytes, anonymised_study_uid = _anonymise_study_instances(
zipped_study=zipped_study,
study_uid=study_uid,
study_info=study_info,
project_name=project_name,
)
except PixlDiscardError as discard:
logger.warning(
"Failed to anonymize project: '{}', study: {}: {}", project_name, study_uid, discard
"Failed to anonymize project: '{}', {}: {}", project_name, study_info, discard
)
return None
except Exception: # noqa: BLE001
logger.exception(
"Failed to anonymize project: '{}', study: {}", project_name, study_uid
)
logger.exception("Failed to anonymize project: '{}', {}", project_name, study_info)
return None

_upload_instances(anonymised_instances_bytes)
Expand All @@ -310,36 +312,16 @@ def get_study_zip_archive_from_raw(resource_id: str) -> BytesIO:
return BytesIO(response.content)


def _get_study_resource_id(study_uid: str) -> str:
"""
Get the resource ID for an existing study based on its StudyInstanceUID.

Returns None if there are no resources with the given StudyInstanceUID.
Returns the resource ID if there is a single resource with the given StudyInstanceUID.
Returns None if there are multiple resources with the given StudyInstanceUID and deletes
the studies.
"""
data = json.dumps(
{
"Level": "Study",
"Query": {
"StudyInstanceUID": study_uid,
},
}
)
study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
if not study_resource_ids:
message = f"No study found with StudyInstanceUID {study_uid}"
raise ValueError(message)
if len(study_resource_ids) > 1:
message = f"Multiple studies found with StudyInstanceUID {study_uid}"
raise ValueError(message)

return study_resource_ids[0]
def _get_study_info_from_first_file(zipped_study_bytes) -> StudyInfo:
with ZipFile(zipped_study_bytes) as zipped_study:
file_info = zipped_study.infolist()[0]
with zipped_study.open(file_info) as file:
dataset = dcmread(file)
return get_study_info(dataset)


def _anonymise_study_instances(
zipped_study: ZipFile, study_uid: str, project_name: str
zipped_study: ZipFile, study_info: StudyInfo, project_name: str
) -> tuple[list[bytes], str]:
"""
Iterate over all instances and anonymise them.
Expand All @@ -350,7 +332,6 @@ def _anonymise_study_instances(
"""
config = load_project_config(project_name)
anonymised_instances_bytes = []
logger.info("Processing project '{}', study: {}", project_name, study_uid)
skipped_instance_counts = defaultdict(int)
dicom_validation_errors = {}

Expand All @@ -364,9 +345,9 @@ def _anonymise_study_instances(
)
except PixlSkipInstanceError as e:
logger.debug(
"Skipping instance {} for study {}: {}",
"Skipping instance {} for {}: {}",
dataset[0x0008, 0x0018].value,
study_uid,
study_info,
e,
)
skipped_instance_counts[str(e)] += 1
Expand All @@ -380,9 +361,9 @@ def _anonymise_study_instances(
raise PixlDiscardError(message)

logger.debug(
"Project '{}' Study {}, skipped instances: {}",
"Project '{}' {}, skipped instances: {}",
project_name,
study_uid,
study_info,
dict(skipped_instance_counts),
)

Expand All @@ -391,7 +372,7 @@ def _anonymise_study_instances(
"The anonymisation introduced the following validation errors:\n{}",
parse_validation_results(dicom_validation_errors),
)
logger.success("Finished anonymising project: '{}', study: {}", project_name, study_uid)
logger.success("Finished anonymising project: '{}', {}", project_name, study_info)
return anonymised_instances_bytes, anonymised_study_uid


Expand Down Expand Up @@ -419,6 +400,34 @@ def _upload_instances(instances_bytes: list[bytes]) -> None:
upload_response.raise_for_status()


def _get_study_resource_id(study_uid: str) -> str:
"""
Get the resource ID for an existing study based on its StudyInstanceUID.

Returns None if there are no resources with the given StudyInstanceUID.
Returns the resource ID if there is a single resource with the given StudyInstanceUID.
Returns None if there are multiple resources with the given StudyInstanceUID and deletes
the studies.
"""
data = json.dumps(
{
"Level": "Study",
"Query": {
"StudyInstanceUID": study_uid,
},
}
)
study_resource_ids = json.loads(orthanc.RestApiPost("/tools/find", data))
if not study_resource_ids:
message = f"No study found with StudyInstanceUID {study_uid}"
raise ValueError(message)
if len(study_resource_ids) > 1:
message = f"Multiple studies found with StudyInstanceUID {study_uid}"
raise ValueError(message)

return study_resource_ids[0]


def send_study(study_id: str, project_name: str) -> None:
"""
Send the resource to the appropriate destination.
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/src/pixl_dcmd/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sqlalchemy import URL, create_engine, exists
from sqlalchemy.orm import sessionmaker, exc

from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo

url = URL.create(
drivername="postgresql+psycopg2",
Expand Down
4 changes: 2 additions & 2 deletions pixl_dcmd/src/pixl_dcmd/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@
get_uniq_pseudo_study_uid_and_update_db,
get_pseudo_patient_id_and_update_db,
)
from pixl_dcmd._dicom_helpers import (
from pixl_dcmd.dicom_helpers import (
DicomValidator,
get_study_info,
)
from pixl_dcmd._tag_schemes import _scheme_list_to_dict, merge_tag_schemes

if typing.TYPE_CHECKING:
from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo


def write_dataset_to_bytes(dataset: Dataset) -> bytes:
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from collections.abc import Generator
from typing import Optional

from pixl_dcmd._dicom_helpers import get_study_info
from pixl_dcmd.dicom_helpers import get_study_info
from core.project_config import load_project_config
import pytest
import pytest_pixl.dicom
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_uniq_pseudo_study_uid_and_update_db,
get_pseudo_patient_id_and_update_db,
)
from pixl_dcmd._dicom_helpers import StudyInfo
from pixl_dcmd.dicom_helpers import StudyInfo
from sqlalchemy.orm import Session

STUDY_DATE = datetime.date.fromisoformat("2023-01-01")
Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_dicom_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from __future__ import annotations

import pytest
from pixl_dcmd._dicom_helpers import DicomValidator
from pixl_dcmd.dicom_helpers import DicomValidator
from pixl_dcmd.main import anonymise_dicom
from pydicom import Dataset

Expand Down
2 changes: 1 addition & 1 deletion pixl_dcmd/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from core.project_config.pixl_config_model import load_config_and_validate
from decouple import config

from pixl_dcmd._dicom_helpers import get_study_info
from pixl_dcmd.dicom_helpers import get_study_info
from pixl_dcmd.main import (
anonymise_dicom_and_update_db,
_anonymise_dicom_from_scheme,
Expand Down