Replicates branch (#15)

- added option to plot replicates - added counter to warnings - added color to points and changed point style for simulation lines - highlighting of plotted data in the measurement table - added possibility to open multiple tables at once - added point symbol to legend - made own class for lines and errorbars for easier handling and readability (dotted_line) - made own class for custom table models (table_models)
PEtab-dev · Mar 8, 2021 · 727d56f · 727d56f
1 parent 9caf1ea
commit 727d56f
Show file tree

Hide file tree

Showing 11 changed files with 775 additions and 411 deletions.
diff --git a/petabvis/bar_plot.py b/petabvis/bar_plot.py
@@ -50,7 +50,7 @@ def plot_everything(self):
         If a simulation df is given, also generate the correlation plot.
         """
         self.plot.clear()
-        self.overview_df = self.get_bars_df(self.bar_rows)
+        self.overview_df = self.get_bars_df()
 
         self.generate_plot()
 
@@ -71,7 +71,27 @@ def add_bar_rows(self, df):
                     row = bar_row.BarRow(df, plot_spec, self.condition_df)
                     self.bar_rows.append(row)
 
-    def get_bars_df(self, bar_rows):
+    def generate_overview_df(self):
+        """
+        Generate the overview df containing the x- and y-data, the name,
+        the dataset_id, the x_var and simulation information of all enabled
+        plotRows.
+
+        Returns:
+            overview_df: A dataframe containing an overview of the plotRows
+        """
+        overview_df = pd.DataFrame(
+            columns=["y", "name", "is_simulation", "dataset_id",
+                     "sd", "sem"])
+        if self.visualization_df is not None:
+            dfs = [bar.get_data_df() for bar in
+                   self.bar_rows
+                   if bar.dataset_id not in self.disabled_rows]
+            if dfs:
+                overview_df = pd.concat(dfs, ignore_index=True)
+        return overview_df
+
+    def get_bars_df(self):
         """
         Generate a dataframe containing plotting information
         of the individual bars.
@@ -82,39 +102,37 @@ def get_bars_df(self, bar_rows):
             df: A dataframe with information relevant
                 for plotting a bar (x, y, sd, etc.)
         """
-        bar_rows = [bar_row for bar_row in bar_rows if
-                    bar_row.dataset_id not in self.disabled_rows]
-
-        x = range(len(bar_rows))
-        tick_pos = range(len(bar_rows))
-        y = [bar.y_data for bar in bar_rows]
-        names = [bar.legend_name for bar in bar_rows]
-        sd = [bar.sd for bar in bar_rows]
-        sem = [bar.sem for bar in bar_rows]
-        noise = [bar.provided_noise for bar in bar_rows]
-        is_simulation = [bar.is_simulation for bar in bar_rows]
-
-        df = pd.DataFrame(
-            list(zip(x, y, names, sd, sem, noise, is_simulation, tick_pos)),
-            columns=["x", "y", "name", "sd", "sem", "provided_noise",
-                     "is_simulation", "tick_pos"])
+        df = self.generate_overview_df()
+
+        x = list(range(len(df.index)))
+        tick_pos = list(range(len(df.index)))
+        for i_name, (name, name_df) in enumerate(df.groupby('name')):
+            for i_replicate, i_row in enumerate(name_df.index):
+                x[i_row] = i_name - np.linspace(start=0, stop=self.bar_width, num=len(name_df.index))[i_replicate]
+                tick_pos[i_row] = i_name - self.bar_width / 2
+        df = df.assign(x=x, tick_pos=tick_pos)
 
         # Adjust x and tick_pos of the bars when simulation bars are plotted
         # such that they are next to each other
         if self.simulation_df is not None:
-            # to keep the order of bars consistent
-            indexes = np.unique(df["name"], return_index=True)[1]
-            names = [df["name"][index] for index in sorted(indexes)]
-            for i, name in enumerate(names):
-                # set measurement and simulation bars to same x based on name
-                index = df[df["name"] == name].index
-                df.loc[index, "x"] = i
-                df.loc[index, "tick_pos"] = i
-
-            # separate measurement and simulation bars
-            bar_separation_shift = self.bar_width / 2
-            df.loc[~df["is_simulation"], "x"] -= bar_separation_shift
-            df.loc[df["is_simulation"], "x"] += bar_separation_shift
+            x = df["x"].tolist()
+            tick_pos = df["tick_pos"].tolist()
+            for i_name, (name, name_df) in enumerate(df.groupby('name')):
+                num_replicates = len(name_df.index) / 2  # /2 due to simulation
+                shift_start = self.bar_width / (2 * num_replicates)
+                for i_replicate, i_row in enumerate(name_df[name_df["is_simulation"]].index):
+                    tick_pos[i_row] = i_name
+                    shift = np.linspace(start=shift_start,
+                                        stop=self.bar_width + shift_start,
+                                        num=int(num_replicates))[i_replicate]
+                    x[i_row] = i_name + shift
+                for i_replicate, i_row in enumerate(name_df[~name_df["is_simulation"]].index):
+                    tick_pos[i_row] = i_name
+                    shift = np.linspace(start=shift_start,
+                                        stop=self.bar_width + shift_start,
+                                        num=int(num_replicates))[i_replicate]
+                    x[i_row] = i_name - shift
+            df = df.assign(x=x, tick_pos=tick_pos)
 
         return df
 
@@ -130,17 +148,28 @@ def generate_plot(self):
             self.plot.setLabel("left", self.bar_rows[0].y_label)
             self.plot.setLabel("bottom", self.bar_rows[0].x_label)
 
+            bar_width = self.bar_width
+            # adjust the barwidth when plotting replicates
+            if self.bar_rows[0].plot_type_data == ptc.REPLICATE:
+                max_num_replicates = max(len(bar.replicates) for bar
+                                         in self.bar_rows)
+                bar_width = self.bar_width / max_num_replicates
+
             # Add bars
             simu_rows = self.overview_df["is_simulation"]
             bar_item = pg.BarGraphItem(x=self.overview_df[~simu_rows]["x"],
                                        height=self.overview_df[~simu_rows][
-                                           "y"], width=self.bar_width)
+                                        "y"], width=bar_width,
+                                       pen=pg.mkPen("b", width=2),
+                                       name="measurement")
             self.plot.addItem(bar_item)  # measurement bars
-            bar_item = pg.BarGraphItem(x=self.overview_df[simu_rows]["x"],
-                                       brush="w",
-                                       height=self.overview_df[simu_rows]["y"],
-                                       width=self.bar_width)
-            self.plot.addItem(bar_item)  # simulation bars
+            if self.simulation_df is not None:
+                bar_item = pg.BarGraphItem(x=self.overview_df[simu_rows]["x"],
+                                           name="simulation",
+                                           height=self.overview_df[simu_rows]["y"],
+                                           width=bar_width,
+                                           pen=pg.mkPen("y", width=2))
+                self.plot.addItem(bar_item)  # simulation bars
 
             # Add error bars
             error_length = self.overview_df["sd"]
@@ -151,7 +180,7 @@ def generate_plot(self):
             error = pg.ErrorBarItem(x=self.overview_df["x"],
                                     y=self.overview_df["y"],
                                     top=error_length, bottom=error_length,
-                                    beam=0.1)
+                                    beam=bar_width/3)
             self.plot.addItem(error)
 
             # set tick names to the legend entry of the bars
@@ -165,7 +194,8 @@ def generate_plot(self):
                 self.plot.setLogMode(y=True)
                 if self.plot_rows[0].x_scale == "log":
                     self.add_warning(
-                        "log not supported, using log10 instead (in " + self.plot_title + ")")
+                        "log not supported, using log10 " +
+                        "instead (in " + self.plot_title + ")")
 
     def add_or_remove_line(self, dataset_id):
         """

diff --git a/petabvis/bar_row.py b/petabvis/bar_row.py
@@ -2,6 +2,7 @@
 import pandas as pd
 
 from . import row_class
+import petab.C as ptc
 
 
 class BarRow(row_class.RowClass):
@@ -38,6 +39,17 @@ def get_mean_y_data(self):
 
         return y_data
 
+    def get_replicate_y_data(self):
+        y_data = []
+        # variable is either measurement or simulation
+        variable = self.get_y_variable_name()
+        for replicate in self.replicates:
+            y_values = np.mean(replicate[variable])
+            y_values = y_values + self.y_offset
+            y_data.append(y_values)
+
+        return y_data
+
     def get_sd(self):
         """
         Return the standard deviation of the y-values that should be plotted.
@@ -50,6 +62,15 @@ def get_sd(self):
 
         return sd
 
+    def get_replicate_sd(self):
+        sds = []
+        variable = self.get_y_variable_name()
+        for replicate in self.replicates:
+            y_values = replicate[variable]
+            sd = np.std(y_values)
+            sds.append(sd)
+        return sds
+
     def get_sem(self):
         """
         Return the standard error of the mean of the
@@ -62,3 +83,26 @@ def get_sem(self):
         sem = self.sd / np.sqrt(len(y_values))
 
         return sem
+
+    def get_data_df(self):
+        """
+        Represent the data of this row as a dataframe.
+        Contains the x- and y-values, the name, the dataset id,
+        the name of the x-variable and the boolean is_simulation.
+        Note: Each x-/y-value pair gets their own row in the df.
+
+        Returns
+            df: The dataframe containing the row information.
+        """
+        if self.plot_type_data == ptc.REPLICATE:
+            y = self.get_replicate_y_data()
+            sd = self.get_replicate_sd()
+        else:
+            y = [self.y_data]
+            sd = self.sd
+        df = pd.DataFrame(
+            {"y": y, "name": self.legend_name,
+             "is_simulation": self.is_simulation,
+             "dataset_id": self.dataset_id,
+             "sd": sd, "sem": self.sem})
+        return df