Skip to content

Commit

Permalink
differentiate between missing column and column with null/NaN values …
Browse files Browse the repository at this point in the history
…for tissue_position
  • Loading branch information
Evan Molinelli authored and Evan Molinelli committed Jan 30, 2025
1 parent 54bc35d commit ff7727c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 14 deletions.
27 changes: 13 additions & 14 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and {ERROR_SUFFIX_IS_SINGLE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_NOTNULL = f"cannot have missing or NaN values when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"

ERROR_SUFFIX_SPARSE_FORMAT = f"Please ensure it is either a dense array or one of the supported sparse matrix encodings ({','.join(SUPPORTED_SPARSE_MATRIX_TYPES)})"
Expand Down Expand Up @@ -1781,22 +1782,20 @@ def _validate_spatial_tissue_position(self, tissue_position_name: str, min: int,
if not self._is_visium_and_is_single_true():
return

# At this point, is_single is True and:
# - there's at least one row with Visum, tissue position column is required
# - for any Visium row, tissue position is required.
if (
tissue_position_name not in self.adata.obs
or (
(
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, False))
.astype(bool)
)
& (self.adata.obs[tissue_position_name].isnull())
).any()
):
# visium rows require tissue_position columns
if tissue_position_name not in self.adata.obs:
# report column is required
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED}.")
return
elif ((
self.adata.obs["assay_ontology_term_id"]
.apply(lambda t: is_ontological_descendant_of(ONTOLOGY_PARSER, t, ASSAY_VISIUM, False))
.astype(bool)
)
& (self.adata.obs[tissue_position_name].isnull())).any():
# report column has bad values
self.errors.append(f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_NOTNULL}.")
return

# Tissue position must be an int.
obs_tissue_position = self.adata.obs.get(tissue_position_name)
Expand Down
12 changes: 12 additions & 0 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_NOTNULL,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM,
Validator,
Expand Down Expand Up @@ -1104,6 +1105,17 @@ def test__validate_tissue_position_int_error(self, tissue_position_name):
assert validator.errors
assert f"obs['{tissue_position_name}'] must be of int type" in validator.errors[0]

@pytest.mark.parametrize("tissue_position_name", ["array_col", "array_row", "in_tissue"])
def test__validate_tissue_position_nan_error(self, tissue_position_name):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs[tissue_position_name] = np.nan

# Confirm tissue_position is identified as invalid.
validator._check_spatial_obs()
assert validator.errors[0] == f"obs['{tissue_position_name}'] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_NOTNULL}."

@pytest.mark.parametrize("assay_ontology_term_id", ["EFO:0022857", "EFO:0022860", "EFO:0022859"])
@pytest.mark.parametrize("tissue_position_name, min", [("array_col", 0), ("array_row", 0), ("in_tissue", 0)])
def test__validate_tissue_position_int_min_error(self, assay_ontology_term_id, tissue_position_name, min):
Expand Down

0 comments on commit ff7727c

Please sign in to comment.