Skip to content

Commit

Permalink
Supply UMAP position information as virtual slide (#350)
Browse files Browse the repository at this point in the history
* Move umap module out of workflow

* Deprecated "reduction visual" workflow and add more detailed UMAP data "virtual sample"

* Deprecate more references to old umap implementation.

* More deprecations, and fix typos

* Fix typos and more deprecations

* Add flag for UMAP availability to study summary

* add new test artifact

* Update test artifact

* Fix bug with check over all studies, and make caching CLI command respect the given database config file contents

* Try to add scaling to create non-trivial distribution of umap positions

* Adjust scale

* Adjust scale
  • Loading branch information
jimmymathews authored Sep 3, 2024
1 parent 7e02e92 commit aad6fd9
Show file tree
Hide file tree
Showing 36 changed files with 1,049 additions and 863 deletions.
5 changes: 0 additions & 5 deletions build/build_scripts/import_test_dataset1.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@ spt workflow configure --workflow='tabular import' --config-file=.workflow.confi
nextflow run .
cat work/*/*/.command.log

cat build/build_scripts/.workflow.config > .workflow.config
spt workflow configure --workflow='reduction visual' --config-file=.workflow.config
nextflow run .
rm -f .nextflow.log*; rm -rf .nextflow/; rm -f configure.sh; rm -f run.sh; rm -f main.nf; rm -f nextflow.config; rm -rf work/; rm -rf results/

spt graphs upload-importances --config_path=build/build_scripts/.graph.config --importances_csv_path=test/test_data/gnn_importances/1.csv
spt graphs upload-importances --config_path=build/build_scripts/.graph_transformer.config --importances_csv_path=test/test_data/gnn_importances/1.csv

Expand Down
4 changes: 0 additions & 4 deletions pyproject.toml.unversioned
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ packages = [
"spatialprofilingtoolbox.workflow.component_interfaces",
"spatialprofilingtoolbox.workflow.graph_generation",
"spatialprofilingtoolbox.workflow.graph_plugin",
"spatialprofilingtoolbox.workflow.reduction_visual",
"spatialprofilingtoolbox.workflow.tabular_import",
"spatialprofilingtoolbox.workflow.tabular_import.parsing",
"spatialprofilingtoolbox.workflow.common",
Expand All @@ -156,13 +155,10 @@ packages = [
"configure.py",
"report_on_logs.py",
"tail_logs.py",
"create_plots_page.py",
"compute_umaps_all.py",
]
"spatialprofilingtoolbox.workflow.assets" = [
".spt_db.config.template",
".workflow.config.template",
"compute_umaps_all.sh",
"log_table.tex.jinja",
"log_table.html.jinja",
"main_visitor.nf",
Expand Down
37 changes: 6 additions & 31 deletions spatialprofilingtoolbox/apiserver/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from typing import Literal
import json
from io import BytesIO
from base64 import b64decode

from fastapi import FastAPI
from fastapi.openapi.utils import get_openapi
Expand All @@ -17,6 +16,7 @@

import secure

from spatialprofilingtoolbox.workflow.common.umap_defaults import VIRTUAL_SAMPLE
from spatialprofilingtoolbox.db.database_connection import DBCursor
from spatialprofilingtoolbox.db.study_tokens import StudyCollectionNaming
from spatialprofilingtoolbox.ondemand.request_scheduling import OnDemandRequester
Expand All @@ -32,10 +32,8 @@
AvailableGNN
)
from spatialprofilingtoolbox.db.exchange_data_formats.cells import BitMaskFeatureNames
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import UMAPChannel
from spatialprofilingtoolbox.db.querying import query
from spatialprofilingtoolbox.apiserver.app.validation import (
ValidChannel,
ValidStudy,
ValidPhenotypeSymbol,
ValidPhenotypeList,
Expand All @@ -48,7 +46,7 @@
from spatialprofilingtoolbox.graphs.config_reader import read_plot_importance_fractions_config
from spatialprofilingtoolbox.graphs.importance_fractions import PlotGenerator

VERSION = '0.23.0'
VERSION = '0.24.0'

TITLE = 'Single cell studies data API'

Expand Down Expand Up @@ -415,8 +413,11 @@ async def get_cell_data_binary(
"""
Get streaming cell-level location and phenotype data in a custom binary format.
The format is documented [here](https://github.com/nadeemlab/SPT/blob/main/docs/cells.md).
The sample may be "UMAP virtual sample" if UMAP dimensional reduction is available.
"""
if not sample in query().get_sample_names(study):
has_umap = query().has_umap(study)
if not sample in query().get_sample_names(study) and not (has_umap and sample == VIRTUAL_SAMPLE):
raise HTTPException(status_code=404, detail=f'Sample "{sample}" does not exist.')
data = query().get_cells_data(study, sample)
input_buffer = BytesIO(data)
Expand All @@ -435,32 +436,6 @@ async def get_cell_data_binary_feature_names(study: ValidStudy) -> BitMaskFeatur
return query().get_ordered_feature_names(study)


@app.get("/visualization-plots/")
async def get_plots(
study: ValidStudy,
) -> list[UMAPChannel]:
"""Base64-encoded plots of UMAP visualizations, one per channel."""
return query().get_umaps_low_resolution(study)


@app.get("/visualization-plot-high-resolution/")
async def get_plot_high_resolution(
study: ValidStudy,
channel: ValidChannel,
):
"""
One full-resolution UMAP plot (for the given channel in the given study), provided as a
streaming PNG.
"""
umap = query().get_umap(study, channel)
input_buffer = BytesIO(b64decode(umap.base64_png))
input_buffer.seek(0)

def streaming_iteration():
yield from input_buffer
return StreamingResponse(streaming_iteration(), media_type="image/png")


def _ensure_plot_cache_exists(study: str):
with DBCursor(study=study) as cursor:
cursor.execute('''
Expand Down
1 change: 0 additions & 1 deletion spatialprofilingtoolbox/db/accessors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@
from spatialprofilingtoolbox.db.accessors.graphs import GraphsAccess
from spatialprofilingtoolbox.db.accessors.phenotypes import PhenotypesAccess
from spatialprofilingtoolbox.db.accessors.study import StudyAccess
from spatialprofilingtoolbox.db.accessors.umap import UMAPAccess
from spatialprofilingtoolbox.db.accessors.cells import CellsAccess
19 changes: 15 additions & 4 deletions spatialprofilingtoolbox/db/accessors/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

from psycopg import Cursor as PsycopgCursor

from spatialprofilingtoolbox.workflow.common.umap_defaults import VIRTUAL_SAMPLE
from spatialprofilingtoolbox.workflow.common.umap_defaults import VIRTUAL_SAMPLE_SPEC1
from spatialprofilingtoolbox.workflow.common.umap_defaults import VIRTUAL_SAMPLE_SPEC2
from spatialprofilingtoolbox.db.exchange_data_formats.cells import CellsData
from spatialprofilingtoolbox.db.exchange_data_formats.cells import BitMaskFeatureNames
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import Channel
Expand Down Expand Up @@ -55,14 +58,18 @@ def _get_location_data(
sample: str,
cell_identifiers: tuple[int, ...],
) -> dict[int, tuple[float, float]]:
if sample == VIRTUAL_SAMPLE:
blob_type = VIRTUAL_SAMPLE_SPEC2[1]
else:
blob_type = 'centroids'
locations: dict[int, tuple[float, float]] = pickle_loads(
self.fetch_one_or_else(
'''
SELECT blob_contents
FROM ondemand_studies_index
WHERE specimen=%s AND blob_type='centroids' ;
WHERE specimen=%s AND blob_type=%s ;
''',
(sample,),
(sample, blob_type),
self.cursor,
f'Requested centroids data for "{sample}" not found in database.'
)
Expand All @@ -76,13 +83,17 @@ def _get_phenotype_data(
sample: str,
cell_identifiers: tuple[int, ...],
) -> dict[int, bytes]:
if sample == VIRTUAL_SAMPLE:
blob_type = VIRTUAL_SAMPLE_SPEC1[1]
else:
blob_type = 'feature_matrix'
index_and_expressions = bytearray(self.fetch_one_or_else(
'''
SELECT blob_contents
FROM ondemand_studies_index
WHERE specimen=%s AND blob_type='feature_matrix' ;
WHERE specimen=%s AND blob_type=%s ;
''',
(sample,),
(sample, blob_type),
self.cursor,
f'Requested phenotype data for "{sample}" not found in database.',
))
Expand Down
14 changes: 14 additions & 0 deletions spatialprofilingtoolbox/db/accessors/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
Context,
Products,
)
from spatialprofilingtoolbox.workflow.common.umap_defaults import VIRTUAL_SAMPLE
from spatialprofilingtoolbox.db.exchange_data_formats.metrics import AvailableGNN
from spatialprofilingtoolbox.db.simple_query_patterns import GetSingleResult
from spatialprofilingtoolbox.db.cohorts import get_sample_cohorts
Expand All @@ -44,12 +45,14 @@ def get_study_summary(self, study: str) -> StudySummary:
assay = self._get_assay(components.measurement)
sample_cohorts = get_sample_cohorts(self.cursor, study)
findings = self.get_study_findings()
has_umap = self.has_umap(study)
return StudySummary(
context=Context(institution=institution, assay=assay, contact=contact),
products=Products(data_release=data_release, publication=publication),
counts=counts_summary,
cohorts=sample_cohorts,
findings=findings,
has_umap=has_umap,
)

def get_study_components(self, study: str) -> StudyComponents:
Expand Down Expand Up @@ -284,3 +287,14 @@ def get_specimen_names(self, study: str) -> tuple[str, ...]:
self.cursor.execute(query, (study,))
rows = self.cursor.fetchall()
return tuple(sorted([row[0] for row in rows]))

def has_umap(self, study: str) -> bool:
query = '''
SELECT COUNT(*)
FROM ondemand_studies_index
WHERE specimen=%s ;
'''
self.cursor.execute(query, (VIRTUAL_SAMPLE,))
rows = self.cursor.fetchall()
return rows[0][0] == 2

47 changes: 0 additions & 47 deletions spatialprofilingtoolbox/db/accessors/umap.py

This file was deleted.

6 changes: 0 additions & 6 deletions spatialprofilingtoolbox/db/data_model/performance_tweaks.sql
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,6 @@ CREATE TABLE sample_strata (
subject_diagnosed_result VARCHAR(512)
);

CREATE TABLE umap_plots (
study VARCHAR(512),
channel VARCHAR(512),
png_base64 VARCHAR
);

CREATE TABLE quantitative_feature_value_queue (
feature INTEGER REFERENCES feature_specification(identifier) ,
subject VARCHAR
Expand Down
3 changes: 1 addition & 2 deletions spatialprofilingtoolbox/db/database_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,15 +267,14 @@ class (QueryCursor) newly provides on each invocation.
get_composite_phenotype_identifiers: Callable
get_phenotype_criteria: Callable
retrieve_signature_of_phenotype: Callable
get_umaps_low_resolution: Callable
get_umap: Callable
get_important_cells: Callable
get_cells_data: Callable
get_ordered_feature_names: Callable
get_sample_names: Callable
get_available_gnn: Callable
get_study_findings: Callable
get_study_gnn_plot_configurations: Callable
has_umap: Callable
is_public_collection: Callable

def __init__(self, query_handler: Type):
Expand Down
8 changes: 0 additions & 8 deletions spatialprofilingtoolbox/db/exchange_data_formats/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,6 @@ class UnivariateMetricsComputationResult(BaseModel):
is_pending: bool


class UMAPChannel(BaseModel):
"""A UMAP dimensional reduction of a cell set, with one intensity channel's overlay.
The image is encoded in base 64.
"""
channel: str
base64_png: str


class CellData(BaseModel):
"""Cell-level data including position and phenotype information, for a single sample.
"""
Expand Down
1 change: 1 addition & 0 deletions spatialprofilingtoolbox/db/exchange_data_formats/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,4 @@ class StudySummary(BaseModel):
counts: CountsSummary
cohorts: SampleCohorts
findings: list[str]
has_umap: bool
2 changes: 1 addition & 1 deletion spatialprofilingtoolbox/db/ondemand_studies_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_counts(database_config_file: str, blob_type: str, study: str | None = No
return counts


def drop_cache_files(database_config_file: str, blob_type: str, study: str | None = None) -> None:
def drop_cache_files(database_config_file: str | None, blob_type: str, study: str | None = None) -> None:
if study is None:
studies = tuple(retrieve_study_names(database_config_file))
else:
Expand Down
16 changes: 4 additions & 12 deletions spatialprofilingtoolbox/db/querying.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
PhenotypeSymbol,
Channel,
PhenotypeCriteria,
UMAPChannel,
AvailableGNN,
)
from spatialprofilingtoolbox.db.exchange_data_formats.cells import CellsData
Expand All @@ -21,7 +20,6 @@
GraphsAccess,
StudyAccess,
PhenotypesAccess,
UMAPAccess,
CellsAccess,
)
from spatialprofilingtoolbox.standalone_utilities import sort
Expand Down Expand Up @@ -107,16 +105,6 @@ def get_channel_names(cls, cursor, study: str) -> tuple[Channel, ...]:
for name in PhenotypesAccess(cursor).get_channel_names(study)
), key=lambda c: c.symbol)

@classmethod
def get_umaps_low_resolution(cls, cursor, study: str) -> list[UMAPChannel]:
access = UMAPAccess(cursor)
umap_rows = access.get_umap_rows(study)
return UMAPAccess.downsample_umaps_base64(umap_rows)

@classmethod
def get_umap(cls, cursor, study: str, channel: str) -> UMAPChannel:
return UMAPAccess(cursor).get_umap_row_for_channel(study, channel)

@classmethod
def get_important_cells(
cls,
Expand Down Expand Up @@ -149,6 +137,10 @@ def get_ordered_feature_names(cls, cursor, study: str) -> BitMaskFeatureNames:
def get_sample_names(cls, cursor, study: str) -> tuple[str, ...]:
return sort(StudyAccess(cursor).get_specimen_names(study))

@classmethod
def has_umap(cls, cursor, study: str) -> bool:
return StudyAccess(cursor).has_umap(study)


def query() -> QueryCursor:
return QueryCursor(QueryHandler)
Loading

0 comments on commit aad6fd9

Please sign in to comment.