Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into stefpiatek/exported_i…
Browse files Browse the repository at this point in the history
…mage_info

# Conflicts:
#	pixl_dcmd/src/pixl_dcmd/_database.py
  • Loading branch information
stefpiatek committed Jan 13, 2025
2 parents 9283e51 + b3921b6 commit 55b2888
Show file tree
Hide file tree
Showing 24 changed files with 432 additions and 235 deletions.
8 changes: 4 additions & 4 deletions cli/src/pixl_cli/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ def _filter_existing_images(
) -> pd.DataFrame:
# DataFrame indices must batch when using df.isin (or df.index.isin)
# So we re-index the DataFrames to match on the columns we want to compare
messages_df_reindexed = messages_df.set_index(["accession_number", "mrn", "study_date"])
images_df_reindexed = images_df.set_index(["accession_number", "mrn", "study_date"])
messages_df_reindexed = messages_df.set_index(["accession_number", "mrn", "study_uid"])
images_df_reindexed = images_df.set_index(["accession_number", "mrn", "study_uid"])
keep_indices = ~messages_df_reindexed.index.isin(images_df_reindexed.index)
return messages_df[keep_indices]

Expand All @@ -101,7 +101,7 @@ def _filter_exported_messages(
) -> pd.DataFrame:
merged = messages_df.merge(
images_df,
on=["accession_number", "mrn", "study_date"],
on=["accession_number", "mrn", "study_uid"],
how="left",
validate="one_to_one",
suffixes=(None, None),
Expand Down Expand Up @@ -131,7 +131,7 @@ def all_images_for_project(project_slug: str) -> pd.DataFrame:
PixlSession = sessionmaker(engine)

query = (
select(Image.accession_number, Image.study_date, Image.mrn, Image.exported_at)
select(Image.accession_number, Image.study_uid, Image.mrn, Image.exported_at)
.join(Extract)
.where(Extract.slug == project_slug)
)
Expand Down
21 changes: 13 additions & 8 deletions cli/src/pixl_cli/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,25 @@ def read_patient_info(resources_path: Path) -> pd.DataFrame:
messages_df = _load_csv(resources_path)
else:
messages_df = _load_parquet(resources_path)
# Tidy up dataframe in case of whitespace or no way to identify images
unique_columns = ["project_name", "mrn", "accession_number", "study_uid"]
filtered_df = messages_df.dropna(subset=["accession_number", "study_uid"], how="all")
for column in unique_columns:
filtered_df[column] = filtered_df[column].str.strip()
filtered_df = filtered_df[
~(filtered_df["accession_number"].eq("") & filtered_df["study_uid"].eq(""))
]

messages_df = messages_df.sort_values(by=["project_name", "study_date"])
messages_df = messages_df.drop_duplicates(
subset=["project_name", "mrn", "accession_number", "study_date"]
)
filtered_df = filtered_df.sort_values(by=["project_name", "study_date"])
filtered_df = filtered_df.drop_duplicates(subset=unique_columns)

if len(messages_df) == 0:
if len(filtered_df) == 0:
msg = f"Failed to find any messages in {resources_path}"
raise ValueError(msg)

logger.info("Created {} messages from {}", len(messages_df), resources_path)
logger.info("Created {} messages from {}", len(filtered_df), resources_path)

return messages_df
return filtered_df


def _load_csv(filepath: Path) -> pd.DataFrame:
Expand Down Expand Up @@ -168,7 +174,6 @@ class DF_COLUMNS(StrEnum): # noqa: N801
"participant_id": "pseudo_patient_id",
}


MAP_PARQUET_TO_MESSAGE_KEYS = {
"PrimaryMrn": "mrn",
"AccessionNumber": "accession_number",
Expand Down
26 changes: 26 additions & 0 deletions cli/tests/test_messages_from_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,32 @@ def test_messages_from_csv(omop_resources: Path) -> None:
assert messages == expected_messages


def test_whitespace_and_na_processing(omop_resources: Path) -> None:
"""
GIVEN a csv with leading and trailing whitespace, a duplicate entry
and ones with no image identifiers (empty and whitespaces).
WHEN the messages are generated from the directory
THEN one message should be generated, with no leading or trailing whitespace
"""
# Arrange
test_csv = omop_resources / "test_whitespace_and_na_processing.csv"
messages_df = read_patient_info(test_csv)
# Act
messages = messages_from_df(messages_df)
# Assert
assert messages == [
Message(
procedure_occurrence_id=0,
mrn="patient_identifier",
accession_number="123456789",
study_uid="1.2.3.4.5.6.7.8",
project_name="ms-pinpoint-test",
extract_generated_timestamp=datetime.datetime.fromisoformat("2023-01-01T00:01:00Z"),
study_date=datetime.date.fromisoformat("2022-01-01"),
),
]


def test_messages_from_csv_multiple_projects(
omop_resources: Path, rows_in_session, mock_publisher
) -> None:
Expand Down
22 changes: 15 additions & 7 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ services:
hasher-api:
build:
context: .
dockerfile: ./docker/hasher-api/Dockerfile
dockerfile: ./docker/pixl-python/Dockerfile
target: hasher_api
args:
PIXL_PACKAGE_DIR: hasher
<<: *build-args-common
environment:
<<: [*proxy-common, *pixl-common-env]
Expand All @@ -93,7 +95,6 @@ services:
networks:
- pixl-net
healthcheck:
test: ["CMD", "curl", "-f", "http://hasher-api:8000/heart-beat"]
interval: 10s
timeout: 30s
retries: 5
Expand All @@ -102,9 +103,11 @@ services:
orthanc-anon:
build:
context: .
dockerfile: ./docker/orthanc-anon/Dockerfile
dockerfile: ./docker/orthanc/Dockerfile
target: pixl_orthanc_anon
args:
<<: *build-args-common
ORTHANC_DIR: orthanc-anon
ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS}
platform: linux/amd64
command: /run/secrets
Expand Down Expand Up @@ -171,9 +174,11 @@ services:
orthanc-raw:
build:
context: .
dockerfile: ./docker/orthanc-raw/Dockerfile
dockerfile: ./docker/orthanc/Dockerfile
target: pixl_orthanc_raw
args:
<<: *build-args-common
ORTHANC_DIR: orthanc-raw
ORTHANC_RAW_MAXIMUM_STORAGE_SIZE: ${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE}
ORTHANC_RAW_JOB_HISTORY_SIZE: ${ORTHANC_RAW_JOB_HISTORY_SIZE}
ORTHANC_CONCURRENT_JOBS: ${ORTHANC_CONCURRENT_JOBS}
Expand Down Expand Up @@ -249,8 +254,10 @@ services:
export-api:
build:
context: .
dockerfile: ./docker/export-api/Dockerfile
dockerfile: ./docker/pixl-python/Dockerfile
target: export_api
args:
PIXL_PACKAGE_DIR: pixl_export
<<: *build-args-common
environment:
<<:
Expand Down Expand Up @@ -297,8 +304,10 @@ services:
imaging-api:
build:
context: .
dockerfile: ./docker/imaging-api/Dockerfile
dockerfile: ./docker/pixl-python/Dockerfile
target: imaging_api
args:
PIXL_PACKAGE_DIR: pixl_imaging
<<: *build-args-common
depends_on:
queue:
Expand All @@ -308,7 +317,6 @@ services:
orthanc-anon:
condition: service_healthy
healthcheck:
test: curl -f http://0.0.0.0:8000/heart-beat
interval: 10s
timeout: 30s
retries: 5
Expand Down
46 changes: 0 additions & 46 deletions docker/hasher-api/Dockerfile

This file was deleted.

50 changes: 0 additions & 50 deletions docker/imaging-api/Dockerfile

This file was deleted.

53 changes: 0 additions & 53 deletions docker/orthanc-anon/Dockerfile

This file was deleted.

35 changes: 29 additions & 6 deletions docker/orthanc-raw/Dockerfile → docker/orthanc/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f
FROM orthancteam/orthanc:24.7.3@sha256:57a3d037729897331027ddc00c12695b50f1effbbf805f855396f3d0248d2d5f AS pixl_orthanc_apt
SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"]

# Create a virtual environment, recommended since python 3.11 and Debian bookworm based images
Expand All @@ -20,6 +20,24 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update && \
apt-get install --yes --no-install-recommends python3-venv tzdata
RUN python3 -m venv /.venv
ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/

# Install curl for now, but try to remove this dependency
RUN apt-get --assume-yes install curl

FROM pixl_orthanc_apt AS pixl_orthanc_with_spec
# This part changes rarely, so do it nice and early to avoid redoing it every time we change our code.
# It does have a dependency though, which would normally be fulfilled by our project files, so install that
# manually.
# Do it in dead end build stage to discard this environment afterwards,
# and because the spec is only needed in orthanc-anon.
RUN /.venv/bin/pip install dicom-validator
COPY ./orthanc/orthanc-anon/plugin/download_dicom_spec.py /etc/orthanc/download_dicom_spec.py
RUN --mount=type=cache,target=/root/.cache \
python3 /etc/orthanc/download_dicom_spec.py


FROM pixl_orthanc_apt AS pixl_orthanc_base

# Install requirements before copying modules
COPY ./pixl_core/pyproject.toml /pixl_core/pyproject.toml
Expand All @@ -37,17 +55,22 @@ COPY ./pixl_dcmd/ /pixl_dcmd
RUN --mount=type=cache,target=/root/.cache \
/.venv/bin/pip install --no-cache-dir --force-reinstall --no-deps ./pixl_dcmd

COPY ./orthanc/orthanc-raw/plugin/pixl.py /etc/orthanc/pixl.py
ARG ORTHANC_DIR
COPY ./orthanc/${ORTHANC_DIR}/plugin/pixl.py /etc/orthanc/pixl.py
COPY ./orthanc/${ORTHANC_DIR}/config /run/secrets

# Orthanc can't substitute environment veriables as integers so copy and replace before running
ARG ORTHANC_CONCURRENT_JOBS
RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json

FROM pixl_orthanc_base AS pixl_orthanc_raw

ARG ORTHANC_RAW_MAXIMUM_STORAGE_SIZE
ARG ORTHANC_RAW_JOB_HISTORY_SIZE
ARG ORTHANC_CONCURRENT_JOBS
ARG PIXL_DICOM_TRANSFER_TIMEOUT
COPY ./orthanc/orthanc-raw/config /run/secrets
RUN sed -i "s/\${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE}/${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE:-0}/g" /run/secrets/orthanc.json
RUN sed -i "s/\${ORTHANC_RAW_JOB_HISTORY_SIZE}/${ORTHANC_RAW_JOB_HISTORY_SIZE:-100}/g" /run/secrets/orthanc.json
RUN sed -i "s/\${ORTHANC_CONCURRENT_JOBS}/${ORTHANC_CONCURRENT_JOBS:-5}/g" /run/secrets/orthanc.json
RUN sed -i "s/\${ORTHANC_RAW_STABLE_SECONDS}/${PIXL_DICOM_TRANSFER_TIMEOUT:-600}/g" /run/secrets/orthanc.json

ENV PYTHONPATH=/.venv/lib64/python3.11/site-packages/
FROM pixl_orthanc_base AS pixl_orthanc_anon
COPY --from=pixl_orthanc_with_spec /root/dicom-validator /root/dicom-validator
Loading

0 comments on commit 55b2888

Please sign in to comment.