LLNL · michaelmckinsey1 · Jan 22, 2025 · Jan 22, 2025 · Jan 23, 2025 · Jan 23, 2025
diff --git a/thicket/ensemble.py b/thicket/ensemble.py
@@ -157,7 +157,9 @@ def _handle_metadata():
                     thickets_cp[i].metadata.reset_index(drop=True, inplace=True)
             if metadata_key is None:
                 for i in range(len(thickets_cp)):
-                    thickets_cp[i].metadata.index.set_names("profile", inplace=True)
+                    thickets_cp[i].metadata.index.set_names(
+                        thickets_cp[i].profile_idx_name, inplace=True
+                    )
             else:
                 for i in range(len(thickets_cp)):
                     if metadata_key != inner_idx:
@@ -228,7 +230,9 @@ def _handle_perfdata():
                         "new_profiles", append=True, inplace=True
                     )
                     thickets_cp[i].dataframe.index.rename(
-                        "profile", level="new_profiles", inplace=True
+                        thickets_cp[i].profile_idx_name,
+                        level="new_profiles",
+                        inplace=True,
                     )
             else:  # Change second-level index to be from metadata's "metadata_key" column
                 for i in range(len(thickets_cp)):

diff --git a/thicket/groupby.py b/thicket/groupby.py
@@ -97,7 +97,7 @@ def _agg_rows(col_series):
             tk_c.dataframe.reset_index()
             .drop(list(tk_c.dataframe.columns) + ["node"], axis=1)
             .drop_duplicates()
-            .set_index("profile")
+            .set_index(tk_c.profile_idx_name)
         )
         if (
             len(new_profile_label_mapping_df.columns) > 1
@@ -120,11 +120,11 @@ def _agg_rows(col_series):
         tk_c.profile_mapping = new_profile_mapping
         # Aggregate metadata
         tk_c.metadata = tk_c.metadata.reset_index()
-        tk_c.metadata["profile"] = tk_c.metadata["profile"].map(
+        tk_c.metadata[tk_c.profile_idx_name] = tk_c.metadata[tk_c.profile_idx_name].map(
             new_profile_label_mapping
         )
-        tk_c.metadata = tk_c.metadata.set_index("profile")
-        tk_c.metadata = tk_c.metadata.groupby("profile").agg(_agg_rows)
+        tk_c.metadata = tk_c.metadata.set_index(tk_c.profile_idx_name)
+        tk_c.metadata = tk_c.metadata.groupby(tk_c.profile_idx_name).agg(_agg_rows)
 
         def _compute_agg_df(col_names, functions, _tk, _agg_cols, _perf_indices):
             agg_df = _tk.dataframe[_agg_cols].groupby(_perf_indices).agg(functions[0])

diff --git a/thicket/stats/calc_boxplot_statistics.py b/thicket/stats/calc_boxplot_statistics.py
@@ -86,7 +86,9 @@ def calc_boxplot_statistics(thicket, columns=[], quartiles=[0.25, 0.5, 0.75], **
                 for i in range(0, len(values)):
                     if values[i] > upper_fence or values[i] < lower_fence:
                         profile.append(
-                            thicket.dataframe.loc[node].reset_index()["profile"][i]
+                            thicket.dataframe.loc[node].reset_index()[
+                                thicket.profile_idx_name
+                            ][i]
                         )
                     else:
                         continue
@@ -146,7 +148,9 @@ def calc_boxplot_statistics(thicket, columns=[], quartiles=[0.25, 0.5, 0.75], **
                 for i in range(0, len(values)):
                     if values[i] > upper_fence or values[i] < lower_fence:
                         profile.append(
-                            thicket.dataframe[idx].loc[node].reset_index()["profile"][i]
+                            thicket.dataframe[idx]
+                            .loc[node]
+                            .reset_index()[thicket.profile_idx_name][i]
                         )
                     else:
                         continue

diff --git a/thicket/tests/data/example-json/user_ensemble.json b/thicket/tests/data/example-json/user_ensemble.json
diff --git a/thicket/tests/test_copy.py b/thicket/tests/test_copy.py
@@ -60,7 +60,7 @@ def test_copy(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata):
 
     # Shallow copy of data
     node = other.dataframe.index.get_level_values("node")[0]
-    profile = other.dataframe.index.get_level_values("profile")[0]
+    profile = other.dataframe.index.get_level_values(other.profile_idx_name)[0]
     other.dataframe.loc[(node, profile), "nid"] = -1
     assert (
         other.dataframe.loc[(node, profile), "nid"]

diff --git a/thicket/tests/test_filter_profile.py b/thicket/tests/test_filter_profile.py
@@ -20,7 +20,7 @@ def test_filter_profile(rajaperf_cali_1trial):
         tk_filt.profile,
         tk_filt.profile_mapping.keys(),
         tk_filt.metadata.index,
-        tk_filt.dataframe.index.get_level_values("profile"),
+        tk_filt.dataframe.index.get_level_values(tk_filt.profile_idx_name),
     ]:
         assert all([prof not in component for prof in rm_profs])
         assert all([prof in component for prof in keep_profs])
diff --git a/thicket/tests/test_from_statsframes.py b/thicket/tests/test_from_statsframes.py
@@ -30,7 +30,7 @@ def test_single_trial(mpi_scaling_cali, intersection, fill_perfdata):
     tk = th.Thicket.from_statsframes(th_list, disable_tqdm=True)
 
     # Check level values
-    assert set(tk.dataframe.index.get_level_values("profile")) == {
+    assert set(tk.dataframe.index.get_level_values(tk.profile_idx_name)) == {
         0,
         1,
         2,

diff --git a/thicket/tests/test_groupby.py b/thicket/tests/test_groupby.py
@@ -24,7 +24,7 @@ def check_groupby(th, columns_values):
         th_list = list(th.groupby(column).values())
 
         for thicket in th_list:
-            check_identity(th, thicket, "default_metric")
+            check_identity(th, thicket, ["default_metric", "profile_idx_name"])
 
         # inspect all unique values in the use case
         for itr, uni_val in enumerate(unique_values):

diff --git a/thicket/tests/test_query.py b/thicket/tests/test_query.py
@@ -48,7 +48,7 @@ def check_query(th, hnids, query):
         th_df_profiles.unique().to_list()
     )
 
-    check_identity(th, filt_th, "default_metric")
+    check_identity(th, filt_th, ["default_metric", "profile_idx_name"])
 
 
 def test_query(rajaperf_cuda_block128_1M_cali, intersection, fill_perfdata):

diff --git a/thicket/tests/test_query_stats.py b/thicket/tests/test_query_stats.py
@@ -58,7 +58,7 @@ def check_query(th_x, hnids, query):
     assert all([n in pd.unique(filt_th_df_nodes) for n in sframe_nodes])
     assert sorted(pd.unique(filt_th_df_nodes)) == sorted(pd.unique(sframe_nodes))
 
-    check_identity(th_x, filt_th, "default_metric")
+    check_identity(th_x, filt_th, ["default_metric", "profile_idx_name"])
 
 
 def test_query_stats(rajaperf_cuda_block128_1M_cali, intersection, fill_perfdata):

diff --git a/thicket/tests/test_thicket.py b/thicket/tests/test_thicket.py
@@ -111,7 +111,7 @@ def test_metadata_columns_to_perfdata(
     assert "variant" not in tkc2.metadata
 
     # Check error raise for join_key
-    tkc2.dataframe = tkc2.dataframe.reset_index(level="profile", drop=True)
+    tkc2.dataframe = tkc2.dataframe.reset_index(level=tkc2.profile_idx_name, drop=True)
     with pytest.raises(KeyError, match="'profile' must be present"):
         tkc2.metadata_columns_to_perfdata("tuning", overwrite=True)
 
@@ -198,8 +198,8 @@ def test_thicketize_graphframe(rajaperf_seq_O3_1M_cali):
     assert ht1.graph == th1.graph
 
     # Check dataframes are equivalent when profile level is dropped
-    th1.dataframe.reset_index(level="profile", inplace=True)
-    th1.dataframe.drop("profile", axis=1, inplace=True)
+    th1.dataframe.reset_index(level=th1.profile_idx_name, inplace=True)
+    th1.dataframe.drop(th1.profile_idx_name, axis=1, inplace=True)
     assert ht1.dataframe.equals(th1.dataframe)
 
 
@@ -217,3 +217,16 @@ def test_unique_metadata_base_cuda(
     assert res["systype_build"] == ["blueos_3_ppc64le_ib_p9"]
     assert res["variant"] == ["Base_CUDA"]
     assert res["tuning"] == ["block_128"]
+
+
+def test_different_profile_idx_name():
+    th = Thicket(
+        graph=ht.graph.Graph(roots=[]),
+        dataframe=pd.DataFrame(
+            index=pd.MultiIndex(
+                names=["node", "profile2"], levels=[[], []], codes=[[], []]
+            )
+        ),
+        profile_idx_name="profile2",
+    )
+    assert th.profile_idx_name == "profile2"
diff --git a/thicket/tests/utils.py b/thicket/tests/utils.py
@@ -19,8 +19,8 @@ def check_identity(
             if equal:
                 assert (
                     obj1.__dict__[key] is obj2.__dict__[key]
-                ), "{} should have the same identy".format(key)
+                ), "{} should have the same identity".format(key)
             else:
                 assert (
                     obj1.__dict__[key] is not obj2.__dict__[key]
-                ), "{} should not have the same identy".format(key)
+                ), "{} should not have the same identity".format(key)
diff --git a/thicket/thicket.py b/thicket/thicket.py
@@ -63,6 +63,7 @@ def __init__(
         metadata={},
         performance_cols=None,
         profile=None,
+        profile_idx_name="profile",
         profile_mapping=None,
         statsframe=None,
         statsframe_ops_cache=None,
@@ -81,13 +82,15 @@ def __init__(
             performance_cols (list): list of numeric columns within the performance
                 dataframe
             profile (list): list of hashed profile strings
+            profile_idx_name (str): name of the profile index in the dataframe
             profile_mapping (dict): mapping of hashed profile strings to original strings
             statsframe (DataFrame): pandas DataFrame indexed by Nodes from the graph
         """
         super().__init__(
             graph, dataframe, exc_metrics, inc_metrics, default_metric, metadata
         )
         self.profile = profile
+        self.profile_idx_name = profile_idx_name
         self.profile_mapping = profile_mapping
         if statsframe is None:
             self.statsframe = GraphFrame(
@@ -202,12 +205,12 @@ def thicketize_graphframe(gf, prf):
             temp_meta = {}
             temp_meta[hash_arg] = th.metadata
             th.metadata = pd.DataFrame.from_dict(temp_meta, orient="index")
-            th.metadata.index.set_names("profile", inplace=True)
+            th.metadata.index.set_names(th.profile_idx_name, inplace=True)
 
             # Add profile to dataframe index
-            th.dataframe["profile"] = hash_arg
+            th.dataframe[th.profile_idx_name] = hash_arg
             index_names = list(th.dataframe.index.names)
-            index_names.insert(1, "profile")
+            index_names.insert(1, th.profile_idx_name)
             th.dataframe.reset_index(inplace=True)
             th.dataframe.set_index(index_names, inplace=True)
 
@@ -590,15 +593,16 @@ def from_json(json_thicket):
             dataframe=gf.dataframe,
             exc_metrics=thicket_dict["exclusive_metrics"],
             inc_metrics=thicket_dict["inclusive_metrics"],
-            profile=thicket_dict["profile"],
+            profile=thicket_dict[thicket_dict["profile_idx_name"]],
+            profile_idx_name=thicket_dict["profile_idx_name"],
             profile_mapping=thicket_dict["profile_mapping"],
         )
 
         if "metadata" in thicket_dict:
             mf = pd.DataFrame(thicket_dict["metadata"])
-            mf.set_index(mf["profile"], inplace=True)
-            if "profile" in mf.columns:
-                mf = mf.drop(columns=["profile"])
+            mf.set_index(mf[th.profile_idx_name], inplace=True)
+            if th.profile_idx_name in mf.columns:
+                mf = mf.drop(columns=[th.profile_idx_name])
             th.metadata = mf
 
         # catch condition where there are no stats
@@ -687,10 +691,10 @@ def _rep_agg_func(col):
             agg_data = pd.DataFrame.from_records(rep_data).agg(_rep_agg_func)
             # Add node and profile
             agg_data["node"] = node_profile[0]
-            agg_data["profile"] = node_profile[1]
+            agg_data[self.profile_idx_name] = node_profile[1]
             # Append to main df
             ncu_df = pd.concat([ncu_df, pd.DataFrame([agg_data])], ignore_index=True)
-        ncu_df = ncu_df.set_index(["node", "profile"])
+        ncu_df = ncu_df.set_index(["node", self.profile_idx_name])
 
         # Apply chosen metrics
         if chosen_metrics:
@@ -715,16 +719,19 @@ def _rep_agg_func(col):
         )
 
     def metadata_columns_to_perfdata(
-        self, metadata_columns, overwrite=False, drop=False, join_key="profile"
+        self, metadata_columns, overwrite=False, drop=False, join_key=None
     ):
         """Add columns from the metadata table to the performance data table. Joins on join_key, an index or column that is present in both tables.
 
         Arguments:
             metadata_columns (list or str): List of the columns from the metadata table
             overwrite (bool): Determines overriding behavior in performance data table
             drop (bool): Whether to drop the columns from the metadata table afterwards
-            join_key (str): Name of the index/column to join on if not 'profile'
+            join_key (str): Name of the index/column to join on if not self.profile_idx_name
         """
+        if join_key is None:
+            join_key = self.profile_idx_name
+
         # Raise error if join_key is not present in both tables
         if not (
             join_key in self.dataframe.reset_index()
@@ -1145,7 +1152,7 @@ def from_statsframes(tk_list, metadata_key=None, disable_tqdm=False):
         # Pre-check of data structures
         for tk in tk_list:
             verify_thicket_structures(
-                tk.dataframe, index=["node", "profile"]
+                tk.dataframe, index=["node", tk.profile_idx_name]
             )  # Required for deepcopy operation
             verify_thicket_structures(
                 tk.statsframe.dataframe, index=["node"]
@@ -1256,7 +1263,8 @@ def to_json(self, ensemble=True, metadata=True, stats=True):
 
         jsonified_thicket["inclusive_metrics"] = self.inc_metrics
         jsonified_thicket["exclusive_metrics"] = self.exc_metrics
-        jsonified_thicket["profile"] = self.profile
+        jsonified_thicket[self.profile_idx_name] = self.profile
+        jsonified_thicket["profile_idx_name"] = self.profile_idx_name
         jsonified_thicket["profile_mapping"] = self.profile_mapping
 
         return json.dumps(jsonified_thicket)
@@ -1572,7 +1580,9 @@ def groupby(self, by):
             # table
             profile_id = df.index.values.tolist()
             sub_thicket.dataframe = sub_thicket.dataframe[
-                sub_thicket.dataframe.index.get_level_values("profile").isin(profile_id)
+                sub_thicket.dataframe.index.get_level_values(
+                    self.profile_idx_name
+                ).isin(profile_id)
             ]
 
             # clear the aggregated statistics table for current unique group
@@ -1629,7 +1639,7 @@ def filter_stats(self, filter_function):
     def move_metrics_to_statsframe(self, metric_columns, profile=None, override=False):
         if not isinstance(metric_columns, (list, tuple)):
             raise TypeError("'metric_columns' must be a list or tuple")
-        profile_list = self.dataframe.index.unique(level="profile").tolist()
+        profile_list = self.dataframe.index.unique(level=self.profile_idx_name).tolist()
         if profile is None and len(profile_list) != 1:
             raise ValueError(
                 "Cannot move a metric to statsframe when there are multiple profiles. Set the 'profile' argument to the profile you want to move"
@@ -1638,10 +1648,12 @@ def move_metrics_to_statsframe(self, metric_columns, profile=None, override=Fals
             raise ValueError("Invalid profile: {}".format(profile))
         df_for_profile = None
         if profile is None:
-            df_for_profile = self.dataframe.reset_index(level="profile", drop=True)
+            df_for_profile = self.dataframe.reset_index(
+                level=self.profile_idx_name, drop=True
+            )
         else:
             df_for_profile = self.dataframe.xs(
-                profile, level="profile", drop_level=True
+                profile, level=self.profile_idx_name, drop_level=True
             )
         new_statsframe_df = self.statsframe.dataframe.copy(deep=True)
         for c in metric_columns: