Skip to content

Commit

Permalink
Fix for custom by_distance, linting, typos
Browse files Browse the repository at this point in the history
  • Loading branch information
Phlya committed Jan 11, 2024
1 parent 3ec968d commit 0e85507
Show file tree
Hide file tree
Showing 3 changed files with 19,522 additions and 242 deletions.
153 changes: 100 additions & 53 deletions coolpuppy/coolpup.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def expand2D(intervals, flank, resolution, rescale_flank=None):
)[["start2", "end2"]]
return intervals


def flip_mark_intervals_func(intervals, flipby, flip_negative_strand, extra_func=None):
if flip_negative_strand:
intervals["flip"] = np.where(intervals["strand1"] == "-", True, False)
Expand All @@ -123,15 +124,22 @@ def flip_mark_intervals_func(intervals, flipby, flip_negative_strand, extra_func
intervals = extra_func(intervals)
return intervals


def flip_snip_func(snip, groupby, ignore_group_order, extra_func=None):
if snip["flip"]:
snip["data"] = np.rot90(np.flipud(snip["data"]))
if ignore_group_order:
keys = np.array([*snip])
filt = [f"{group}1" in keys and f"{group}2" in keys for group in [k[:-1] for k in keys]]
filt = [
f"{group}1" in keys and f"{group}2" in keys
for group in [k[:-1] for k in keys]
]
paired_groups = list(set([group[:-1] for group in keys[filt]]))
for group in paired_groups:
snip[f"{group}1"], snip[f"{group}2"] = snip[f"{group}2"], snip[f"{group}1"]
snip[f"{group}1"], snip[f"{group}2"] = (
snip[f"{group}2"],
snip[f"{group}1"],
)
if groupby:
snip["group"] = np.array([snip[col] for col in groupby], dtype=object)
if extra_func is not None:
Expand Down Expand Up @@ -1052,10 +1060,6 @@ def get_data(self, region1, region2=None):
return data.tocsr()

def _stream_snips(self, intervals, region1, region2=None):
mymap = self.make_outmap()
cov_start = np.zeros(mymap.shape[0])
cov_end = np.zeros(mymap.shape[1])

try:
row1 = next(intervals)
except StopIteration:
Expand Down Expand Up @@ -1085,10 +1089,10 @@ def _stream_snips(self, intervals, region1, region2=None):
self.clr.bins()[self.clr_weight_name].fetch(region2_coords).values
)
else:
isnan1 = isnan = np.zeros_like(
isnan1 = np.zeros_like(
self.clr.bins()["start"].fetch(region1_coords).values
).astype(bool)
isnan2 = isnan = np.zeros_like(
isnan2 = np.zeros_like(
self.clr.bins()["start"].fetch(region2_coords).values
).astype(bool)

Expand Down Expand Up @@ -1166,12 +1170,19 @@ def _stream_snips(self, intervals, region1, region2=None):
snip["vertical_stripe"] = np.array(
snip["data"][:, cntr][::-1], dtype=float
)
snip["coordinates"] = ".".join([str(snip[col]) for col in ["chrom1",
"start1",
"end1",
"chrom2",
"start2",
"end2"]])
snip["coordinates"] = ".".join(
[
str(snip[col])
for col in [
"chrom1",
"start1",
"end1",
"chrom2",
"start2",
"end2",
]
]
)
else:
snip["horizontal_stripe"] = []
snip["vertical_stripe"] = []
Expand Down Expand Up @@ -1370,7 +1381,7 @@ def pileupsWithControl(
groupby : list of str, optional
Which attributes of each snip to assign a group to it
ignore_group_order : bool or str or list, optional
When using groupby, reorder so that e.g. group1-group2 and group2-group1 will be
When using groupby, reorder so that e.g. group1-group2 and group2-group1 will be
combined into one and flipped to the correct orientation. If using multiple paired
groupings (e.g. group1-group2 and category1-category2), need to specify which
grouping should be prioritised, e.g. "group" or ["group1", "group2"]. For flip_negative_strand,
Expand Down Expand Up @@ -1424,43 +1435,62 @@ def pileupsWithControl(
flipby = "strand"
if self.ignore_group_order:
if self.local:
raise ValueError("ignore_group_order doesn't make sense for local pileups")
raise ValueError(
"ignore_group_order doesn't make sense for local pileups"
)
elif self.kind == "bedpe":
raise ValueError("ignore_group_order doesn't make sense for bedpe files")
raise ValueError(
"ignore_group_order doesn't make sense for bedpe files"
)
elif groupby:
warnings.warn("flip_negative_strand and ignore_group_order leads to combining strands, not other groups")
warnings.warn(
"flip_negative_strand and ignore_group_order leads to combining strands, not other groups"
)
elif self.ignore_group_order and groupby:
if self.local:
raise ValueError("ignore_group_order doesn't make sense for local pileups")
raise ValueError(
"ignore_group_order doesn't make sense for local pileups"
)
if self.kind == "bedpe":
raise ValueError("ignore_group_order doesn't make sense for bedpe files")
raise ValueError(
"ignore_group_order doesn't make sense for bedpe files"
)
groups = np.array(groupby)
filt=[f"{group}1" in groups and f"{group}2" in groups for group in [g[:-1] for g in groups]]
filt = [
f"{group}1" in groups and f"{group}2" in groups
for group in [g[:-1] for g in groups]
]
groups_filtered = np.sort(groups[filt])
if self.ignore_group_order is True:
flipby = list(set([g[:-1] for g in groups_filtered]))
elif isinstance(self.ignore_group_order, str):
flipby=[self.ignore_group_order]
flipby = [self.ignore_group_order]
elif len(self.ignore_group_order) == 1:
flipby=self.ignore_group_order
flipby = self.ignore_group_order
elif len(self.ignore_group_order) > 1:
flipby = list(set([g[:-1] for g in self.ignore_group_order]))
if len(flipby) == 1 and f"{flipby[0]}1" in groups_filtered:
flipby=flipby[0]
flipby = flipby[0]
else:
raise ValueError("Ambiguous ignore_group_order, please provide str or list of two strings which are in groupby")
raise ValueError(
"Ambiguous ignore_group_order, please provide str or list of two strings which are in groupby"
)
elif self.ignore_group_order and not groupby:
warnings.warn("Need to specify groupby for ignore_group_order")

if self.flip_negative_strand or (self.ignore_group_order and groupby):
modify_2Dintervals_func_final = partial(flip_mark_intervals_func,
flipby=flipby,
flip_negative_strand=self.flip_negative_strand,
extra_func=modify_2Dintervals_func)
postprocess_func_final = partial(flip_snip_func,
groupby=groupby,
ignore_group_order=self.ignore_group_order,
extra_func=postprocess_func)
modify_2Dintervals_func_final = partial(
flip_mark_intervals_func,
flipby=flipby,
flip_negative_strand=self.flip_negative_strand,
extra_func=modify_2Dintervals_func,
)
postprocess_func_final = partial(
flip_snip_func,
groupby=groupby,
ignore_group_order=self.ignore_group_order,
extra_func=postprocess_func,
)
else:
modify_2Dintervals_func_final = modify_2Dintervals_func
postprocess_func_final = postprocess_func
Expand Down Expand Up @@ -1619,14 +1649,16 @@ def pileupsWithControl(

for name, attr in self.__dict__.items():
if name not in exclude_attributes:
if type(attr) == list:
if isinstance(attr, list):
attr = str(attr)
if type(attr) == cooler.api.Cooler:
if isinstance(attr, cooler.api.Cooler):
attr = os.path.abspath(attr.filename)
normalized_roi[name] = attr
return normalized_roi

def pileupsByStrandWithControl(self, nproc=None, groupby=[], ignore_group_order=False):
def pileupsByStrandWithControl(
self, nproc=None, groupby=[], ignore_group_order=False
):
"""Perform by-strand pileups across all chromosomes and applies required
normalization. Simple wrapper around pileupsWithControl.
Assumes the features in CoordCreator file has a "strand" column.
Expand Down Expand Up @@ -1696,9 +1728,11 @@ def pileupsByWindowWithControl(
normalized_pileups = self.pileupsWithControl(
nproc=nproc, postprocess_func=group_by_region
)
normalized_pileups["group2"] = np.where(normalized_pileups["group"] == "all",
normalized_pileups["group"].str.split("l"),
normalized_pileups["group"])
normalized_pileups["group2"] = np.where(
normalized_pileups["group"] == "all",
normalized_pileups["group"].str.split("l"),
normalized_pileups["group"],
)
normalized_pileups = pd.concat(
[
pd.DataFrame(
Expand Down Expand Up @@ -1996,7 +2030,7 @@ def pileup(
features have a column "group", specify ["group1", "group2"].
The default is [].
ignore_group_order : bool or str or list, optional
When using groupby, reorder so that e.g. group1-group2 and group2-group1 will be
When using groupby, reorder so that e.g. group1-group2 and group2-group1 will be
combined into one and flipped to the correct orientation. If using multiple paired
groupings (e.g. group1-group2 and category1-category2), need to specify which
grouping should be prioritised, e.g. "group" or ["group1", "group2"]. For flip_negative_strand,
Expand Down Expand Up @@ -2047,24 +2081,25 @@ def pileup(
if any, all possible annotations from the arguments of this function.
"""
if by_distance is not False:
if by_distance is True or by_distance == "default":
distance_edges = "default"
by_distance = True
elif len(by_distance) > 0:
if local:
raise ValueError(
"Can't do local pileups by distance, please specify only one of those arguments"
)

if isinstance(by_distance, np.ndarray):
try:
distance_edges = [int(i) for i in by_distance]
except Exception as e:
raise ValueError(
"Distance bin edges have to be an iterable of integers or convertable to integers"
) from e
by_distance = True
elif by_distance is True or by_distance == "default":
distance_edges = "default"
by_distance = True
else:
raise ValueError(
"Invalid by_distance value, should be either 'default' or a list of integers"
)
if local:
raise ValueError(
"Can't do local pileups by distance, please specify only one of those arguments"
"Invalid by_distance value, should be either True, 'default' or a list of integers"
)

if not rescale:
Expand Down Expand Up @@ -2198,25 +2233,37 @@ def pileup(
warnings.warn("by-window not compatible with additional groupby")
elif by_strand and by_distance:
pups = PU.pileupsByStrandByDistanceWithControl(
nproc=nproc, distance_edges=distance_edges, groupby=groupby, ignore_group_order=ignore_group_order,
nproc=nproc,
distance_edges=distance_edges,
groupby=groupby,
ignore_group_order=ignore_group_order,
)
pups["by_window"] = False
pups["by_strand"] = True
pups["by_distance"] = True
elif by_strand:
pups = PU.pileupsByStrandWithControl(groupby=groupby, ignore_group_order=ignore_group_order,)
pups = PU.pileupsByStrandWithControl(
groupby=groupby,
ignore_group_order=ignore_group_order,
)
pups["by_window"] = False
pups["by_strand"] = True
pups["by_distance"] = False
elif by_distance:
pups = PU.pileupsByDistanceWithControl(
nproc=nproc, distance_edges=distance_edges, groupby=groupby, ignore_group_order=ignore_group_order,
nproc=nproc,
distance_edges=distance_edges,
groupby=groupby,
ignore_group_order=ignore_group_order,
)
pups["by_window"] = False
pups["by_strand"] = False
pups["by_distance"] = True
else:
pups = PU.pileupsWithControl(groupby=groupby, ignore_group_order=ignore_group_order,)
pups = PU.pileupsWithControl(
groupby=groupby,
ignore_group_order=ignore_group_order,
)
pups["by_window"] = False
pups["by_strand"] = False
pups["by_distance"] = False
Expand Down
Loading

0 comments on commit 0e85507

Please sign in to comment.