Skip to content

Commit

Permalink
update biobear to allow lazyframe loading
Browse files Browse the repository at this point in the history
  • Loading branch information
jdcla authored and jdcla committed Jan 22, 2025
1 parent 14592ce commit 417324c
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 9 deletions.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ install_requires =
numpy >= 1.26.4
scipy >= 1.13.0
pandas >= 2.2.2
biobear >= 0.20.2
biobear >= 0.23.7
pyarrow >= 18.0.0
h5max == 0.3.2
fasta-reader == 3.0.2
Expand Down
10 changes: 2 additions & 8 deletions transcript_transformer/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,8 @@ def process_ribo_data(
s = f"CREATE EXTERNAL TABLE test STORED AS BAM LOCATION '{path}'"
ctx = bb.connect()
ctx.sql(s)
exe = ctx.sql("SELECT reference, start, sequence FROM test")
# Convert the list of RecordBatches to a Table
table = Table.from_batches(exe.to_arrow_record_batch_reader())
# Create a Dataset from the Table
ds = dataset(table)
# Lazyframe
lf = pl.scan_pyarrow_dataset(ds)

s_2 = "SELECT reference, start, sequence FROM test"
lf = ctx.sql(s_2).to_polars(lazy=True)
else:
raise TypeError(f"file extension {file_ext} not supported")
new_columns = ["transcript_id", "pos", "read"]
Expand Down

0 comments on commit 417324c

Please sign in to comment.