Skip to content

Commit

Permalink
Replicates branch (#15)
Browse files Browse the repository at this point in the history
- added option to plot replicates
- added counter to warnings
- added color to points and changed point style for simulation lines
- highlighting of plotted data in the measurement table
- added possibility to open multiple tables at once
- added point symbol to legend
- made own class for lines and errorbars for easier handling and readability (dotted_line)
- made own class for custom table models (table_models)
  • Loading branch information
molnarf authored Mar 8, 2021
1 parent 9caf1ea commit 727d56f
Show file tree
Hide file tree
Showing 11 changed files with 775 additions and 411 deletions.
108 changes: 69 additions & 39 deletions petabvis/bar_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def plot_everything(self):
If a simulation df is given, also generate the correlation plot.
"""
self.plot.clear()
self.overview_df = self.get_bars_df(self.bar_rows)
self.overview_df = self.get_bars_df()

self.generate_plot()

Expand All @@ -71,7 +71,27 @@ def add_bar_rows(self, df):
row = bar_row.BarRow(df, plot_spec, self.condition_df)
self.bar_rows.append(row)

def get_bars_df(self, bar_rows):
def generate_overview_df(self):
"""
Generate the overview df containing the x- and y-data, the name,
the dataset_id, the x_var and simulation information of all enabled
plotRows.
Returns:
overview_df: A dataframe containing an overview of the plotRows
"""
overview_df = pd.DataFrame(
columns=["y", "name", "is_simulation", "dataset_id",
"sd", "sem"])
if self.visualization_df is not None:
dfs = [bar.get_data_df() for bar in
self.bar_rows
if bar.dataset_id not in self.disabled_rows]
if dfs:
overview_df = pd.concat(dfs, ignore_index=True)
return overview_df

def get_bars_df(self):
"""
Generate a dataframe containing plotting information
of the individual bars.
Expand All @@ -82,39 +102,37 @@ def get_bars_df(self, bar_rows):
df: A dataframe with information relevant
for plotting a bar (x, y, sd, etc.)
"""
bar_rows = [bar_row for bar_row in bar_rows if
bar_row.dataset_id not in self.disabled_rows]

x = range(len(bar_rows))
tick_pos = range(len(bar_rows))
y = [bar.y_data for bar in bar_rows]
names = [bar.legend_name for bar in bar_rows]
sd = [bar.sd for bar in bar_rows]
sem = [bar.sem for bar in bar_rows]
noise = [bar.provided_noise for bar in bar_rows]
is_simulation = [bar.is_simulation for bar in bar_rows]

df = pd.DataFrame(
list(zip(x, y, names, sd, sem, noise, is_simulation, tick_pos)),
columns=["x", "y", "name", "sd", "sem", "provided_noise",
"is_simulation", "tick_pos"])
df = self.generate_overview_df()

x = list(range(len(df.index)))
tick_pos = list(range(len(df.index)))
for i_name, (name, name_df) in enumerate(df.groupby('name')):
for i_replicate, i_row in enumerate(name_df.index):
x[i_row] = i_name - np.linspace(start=0, stop=self.bar_width, num=len(name_df.index))[i_replicate]
tick_pos[i_row] = i_name - self.bar_width / 2
df = df.assign(x=x, tick_pos=tick_pos)

# Adjust x and tick_pos of the bars when simulation bars are plotted
# such that they are next to each other
if self.simulation_df is not None:
# to keep the order of bars consistent
indexes = np.unique(df["name"], return_index=True)[1]
names = [df["name"][index] for index in sorted(indexes)]
for i, name in enumerate(names):
# set measurement and simulation bars to same x based on name
index = df[df["name"] == name].index
df.loc[index, "x"] = i
df.loc[index, "tick_pos"] = i

# separate measurement and simulation bars
bar_separation_shift = self.bar_width / 2
df.loc[~df["is_simulation"], "x"] -= bar_separation_shift
df.loc[df["is_simulation"], "x"] += bar_separation_shift
x = df["x"].tolist()
tick_pos = df["tick_pos"].tolist()
for i_name, (name, name_df) in enumerate(df.groupby('name')):
num_replicates = len(name_df.index) / 2 # /2 due to simulation
shift_start = self.bar_width / (2 * num_replicates)
for i_replicate, i_row in enumerate(name_df[name_df["is_simulation"]].index):
tick_pos[i_row] = i_name
shift = np.linspace(start=shift_start,
stop=self.bar_width + shift_start,
num=int(num_replicates))[i_replicate]
x[i_row] = i_name + shift
for i_replicate, i_row in enumerate(name_df[~name_df["is_simulation"]].index):
tick_pos[i_row] = i_name
shift = np.linspace(start=shift_start,
stop=self.bar_width + shift_start,
num=int(num_replicates))[i_replicate]
x[i_row] = i_name - shift
df = df.assign(x=x, tick_pos=tick_pos)

return df

Expand All @@ -130,17 +148,28 @@ def generate_plot(self):
self.plot.setLabel("left", self.bar_rows[0].y_label)
self.plot.setLabel("bottom", self.bar_rows[0].x_label)

bar_width = self.bar_width
# adjust the barwidth when plotting replicates
if self.bar_rows[0].plot_type_data == ptc.REPLICATE:
max_num_replicates = max(len(bar.replicates) for bar
in self.bar_rows)
bar_width = self.bar_width / max_num_replicates

# Add bars
simu_rows = self.overview_df["is_simulation"]
bar_item = pg.BarGraphItem(x=self.overview_df[~simu_rows]["x"],
height=self.overview_df[~simu_rows][
"y"], width=self.bar_width)
"y"], width=bar_width,
pen=pg.mkPen("b", width=2),
name="measurement")
self.plot.addItem(bar_item) # measurement bars
bar_item = pg.BarGraphItem(x=self.overview_df[simu_rows]["x"],
brush="w",
height=self.overview_df[simu_rows]["y"],
width=self.bar_width)
self.plot.addItem(bar_item) # simulation bars
if self.simulation_df is not None:
bar_item = pg.BarGraphItem(x=self.overview_df[simu_rows]["x"],
name="simulation",
height=self.overview_df[simu_rows]["y"],
width=bar_width,
pen=pg.mkPen("y", width=2))
self.plot.addItem(bar_item) # simulation bars

# Add error bars
error_length = self.overview_df["sd"]
Expand All @@ -151,7 +180,7 @@ def generate_plot(self):
error = pg.ErrorBarItem(x=self.overview_df["x"],
y=self.overview_df["y"],
top=error_length, bottom=error_length,
beam=0.1)
beam=bar_width/3)
self.plot.addItem(error)

# set tick names to the legend entry of the bars
Expand All @@ -165,7 +194,8 @@ def generate_plot(self):
self.plot.setLogMode(y=True)
if self.plot_rows[0].x_scale == "log":
self.add_warning(
"log not supported, using log10 instead (in " + self.plot_title + ")")
"log not supported, using log10 " +
"instead (in " + self.plot_title + ")")

def add_or_remove_line(self, dataset_id):
"""
Expand Down
44 changes: 44 additions & 0 deletions petabvis/bar_row.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd

from . import row_class
import petab.C as ptc


class BarRow(row_class.RowClass):
Expand Down Expand Up @@ -38,6 +39,17 @@ def get_mean_y_data(self):

return y_data

def get_replicate_y_data(self):
y_data = []
# variable is either measurement or simulation
variable = self.get_y_variable_name()
for replicate in self.replicates:
y_values = np.mean(replicate[variable])
y_values = y_values + self.y_offset
y_data.append(y_values)

return y_data

def get_sd(self):
"""
Return the standard deviation of the y-values that should be plotted.
Expand All @@ -50,6 +62,15 @@ def get_sd(self):

return sd

def get_replicate_sd(self):
sds = []
variable = self.get_y_variable_name()
for replicate in self.replicates:
y_values = replicate[variable]
sd = np.std(y_values)
sds.append(sd)
return sds

def get_sem(self):
"""
Return the standard error of the mean of the
Expand All @@ -62,3 +83,26 @@ def get_sem(self):
sem = self.sd / np.sqrt(len(y_values))

return sem

def get_data_df(self):
"""
Represent the data of this row as a dataframe.
Contains the x- and y-values, the name, the dataset id,
the name of the x-variable and the boolean is_simulation.
Note: Each x-/y-value pair gets their own row in the df.
Returns
df: The dataframe containing the row information.
"""
if self.plot_type_data == ptc.REPLICATE:
y = self.get_replicate_y_data()
sd = self.get_replicate_sd()
else:
y = [self.y_data]
sd = self.sd
df = pd.DataFrame(
{"y": y, "name": self.legend_name,
"is_simulation": self.is_simulation,
"dataset_id": self.dataset_id,
"sd": sd, "sem": self.sem})
return df
Loading

0 comments on commit 727d56f

Please sign in to comment.