Skip to content

Commit

Permalink
configurable chunk_size in read_h5ad
Browse files Browse the repository at this point in the history
  • Loading branch information
nayib-jose-gloria committed Dec 23, 2024
1 parent b8c8c29 commit 4e20f61
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions cellxgene_schema_cli/cellxgene_schema/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def getattr_anndata(adata: ad.AnnData, attr: str = None):
return getattr(adata, attr)


def read_backed(f: h5py.File, chunk_size: int = 10_000) -> ad.AnnData:
def read_backed(f: h5py.File, chunk_size: int) -> ad.AnnData:
"""
Read an AnnData object from a h5py.File object, reading in matrices (dense or sparse) as dask arrays. Does not
read full matrices into memory.
Expand Down Expand Up @@ -146,7 +146,7 @@ def callback(func, elem_name: str, elem, iospec):
return adata


def read_h5ad(h5ad_path: Union[str, bytes, os.PathLike]) -> ad.AnnData:
def read_h5ad(h5ad_path: Union[str, bytes, os.PathLike], chunk_size: int = 10_000) -> ad.AnnData:
"""
Reads h5ad into adata
:params Union[str, bytes, os.PathLike] h5ad_path: path to h5ad to read
Expand All @@ -155,7 +155,7 @@ def read_h5ad(h5ad_path: Union[str, bytes, os.PathLike]) -> ad.AnnData:
"""
try:
f = h5py.File(h5ad_path)
adata = read_backed(f)
adata = read_backed(f, chunk_size)

# This code, and AnnData in general, is optimized for row access.
# Running backed, with CSC, is prohibitively slow. Read the entire
Expand Down

0 comments on commit 4e20f61

Please sign in to comment.