From 1fe16e3291a415f93ca2d89d5ca3fea91100bd4b Mon Sep 17 00:00:00 2001 From: Bento007 Date: Tue, 14 Jan 2025 16:06:56 -0800 Subject: [PATCH] use dask threads schedule with default values. This is faster an reducues the over head of maintaining the dask configuration --- cellxgene_schema_cli/cellxgene_schema/validate.py | 10 ++-------- cellxgene_schema_cli/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/cellxgene_schema_cli/cellxgene_schema/validate.py b/cellxgene_schema_cli/cellxgene_schema/validate.py index 7da436e2..a08f4ba1 100644 --- a/cellxgene_schema_cli/cellxgene_schema/validate.py +++ b/cellxgene_schema_cli/cellxgene_schema/validate.py @@ -2130,14 +2130,8 @@ def validate( validator = Validator( ignore_labels=ignore_labels, ) - with dask.config.set( - { - "num_workers": n_workers, - "threads_per_worker": 1, - "distributed.worker.memory.limit": "6GB", - "scheduler": "threads", - } - ): + + with dask.config.set({"scheduler": "threads"}): validator.validate_adata(h5ad_path) logger.info(f"Validation complete in {datetime.now() - start} with status is_valid={validator.is_valid}") diff --git a/cellxgene_schema_cli/requirements.txt b/cellxgene_schema_cli/requirements.txt index c9498aa7..10663d52 100644 --- a/cellxgene_schema_cli/requirements.txt +++ b/cellxgene_schema_cli/requirements.txt @@ -2,7 +2,7 @@ anndata==0.11.2 cellxgene-ontology-guide==1.3.0 # update before a schema migration click<9 Cython<4 -dask==2024.12.0 +dask[array]==2024.12.0 numpy<3 pandas>2,<3 PyYAML<7