Skip to content

Commit

Permalink
Merge pull request jeromekelleher#281 from jeromekelleher/recombinant…
Browse files Browse the repository at this point in the history
…-update

Construct a recombinant frankentype and test
  • Loading branch information
jeromekelleher authored Sep 19, 2024
2 parents 23f6b30 + 511b90d commit cdf35a9
Show file tree
Hide file tree
Showing 8 changed files with 1,170 additions and 614 deletions.
643 changes: 610 additions & 33 deletions notebooks/test_ts.ipynb

Large diffs are not rendered by default.

222 changes: 96 additions & 126 deletions sc2ts/inference.py

Large diffs are not rendered by default.

9 changes: 6 additions & 3 deletions sc2ts/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,9 @@ def node_counts(self):

def _preprocess_nodes(self, show_progress):
ts = self.ts
self.nodes_max_descendant_samples = max_descendant_samples(ts)
self.nodes_max_descendant_samples = max_descendant_samples(
ts, show_progress=show_progress
)
self.nodes_date = np.zeros(ts.num_nodes, dtype="datetime64[D]")
self.nodes_num_masked_sites = np.zeros(ts.num_nodes, dtype=np.int32)
self.nodes_metadata = {}
Expand Down Expand Up @@ -367,7 +369,9 @@ def _preprocess_mutations(self, show_progress):
transversions.add((b1, b2))

tree = ts.first()
iterator = tqdm.tqdm(np.arange(N), desc="Classifying mutations")
iterator = tqdm.tqdm(
np.arange(N), desc="Classifying mutations", disable=not show_progress
)
for mut_id in iterator:
tree.seek(self.mutations_position[mut_id])
mutation_node = ts.mutations_node[mut_id]
Expand Down Expand Up @@ -657,7 +661,6 @@ def recombinants_summary(self):
df["breakpoint_interval_right"] = interval_right
df["max_descendant_samples"] = self.nodes_max_descendant_samples[df.node]


df = df.set_index("node")
# The MRCA table will contain duplicate rows here for two-or-more matches.
# We just hacking for now to get things working well for two-parent recombs
Expand Down
4 changes: 2 additions & 2 deletions sc2ts/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(self, path):
logger.debug(f"Opened MetadataDb at {path} mode=ro")

@staticmethod
def import_csv(csv_path, db_path):
df = pd.read_csv(csv_path, sep="\t")
def import_csv(csv_path, db_path, sep="\t"):
df = pd.read_csv(csv_path, sep=sep)
db_path = pathlib.Path(db_path)
if db_path.exists():
db_path.unlink()
Expand Down
4 changes: 3 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def fx_ts_map(tmp_path, fx_data_cache, fx_metadata_db, fx_alignment_store):
]
cache_path = fx_data_cache / f"{dates[-1]}.ts"
if not cache_path.exists():
last_ts = sc2ts.initial_ts()
# These sites are masked out in all alignments in the initial data
# anyway; https://github.com/jeromekelleher/sc2ts/issues/282
last_ts = sc2ts.initial_ts([56, 57, 58, 59, 60])
match_db = sc2ts.MatchDb.initialise(tmp_path / "match.db")
for date in dates:
last_ts = sc2ts.extend(
Expand Down
Loading

0 comments on commit cdf35a9

Please sign in to comment.