Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add profile_idx_name as member to Thicket class #235

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions thicket/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,9 @@ def _handle_metadata():
thickets_cp[i].metadata.reset_index(drop=True, inplace=True)
if metadata_key is None:
for i in range(len(thickets_cp)):
thickets_cp[i].metadata.index.set_names("profile", inplace=True)
thickets_cp[i].metadata.index.set_names(
thickets_cp[i].profile_idx_name, inplace=True
)
else:
for i in range(len(thickets_cp)):
if metadata_key != inner_idx:
Expand Down Expand Up @@ -228,7 +230,9 @@ def _handle_perfdata():
"new_profiles", append=True, inplace=True
)
thickets_cp[i].dataframe.index.rename(
"profile", level="new_profiles", inplace=True
thickets_cp[i].profile_idx_name,
level="new_profiles",
inplace=True,
)
else: # Change second-level index to be from metadata's "metadata_key" column
for i in range(len(thickets_cp)):
Expand Down
8 changes: 4 additions & 4 deletions thicket/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _agg_rows(col_series):
tk_c.dataframe.reset_index()
.drop(list(tk_c.dataframe.columns) + ["node"], axis=1)
.drop_duplicates()
.set_index("profile")
.set_index(tk_c.profile_idx_name)
)
if (
len(new_profile_label_mapping_df.columns) > 1
Expand All @@ -120,11 +120,11 @@ def _agg_rows(col_series):
tk_c.profile_mapping = new_profile_mapping
# Aggregate metadata
tk_c.metadata = tk_c.metadata.reset_index()
tk_c.metadata["profile"] = tk_c.metadata["profile"].map(
tk_c.metadata[tk_c.profile_idx_name] = tk_c.metadata[tk_c.profile_idx_name].map(
new_profile_label_mapping
)
tk_c.metadata = tk_c.metadata.set_index("profile")
tk_c.metadata = tk_c.metadata.groupby("profile").agg(_agg_rows)
tk_c.metadata = tk_c.metadata.set_index(tk_c.profile_idx_name)
tk_c.metadata = tk_c.metadata.groupby(tk_c.profile_idx_name).agg(_agg_rows)

def _compute_agg_df(col_names, functions, _tk, _agg_cols, _perf_indices):
agg_df = _tk.dataframe[_agg_cols].groupby(_perf_indices).agg(functions[0])
Expand Down
8 changes: 6 additions & 2 deletions thicket/stats/calc_boxplot_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ def calc_boxplot_statistics(thicket, columns=[], quartiles=[0.25, 0.5, 0.75], **
for i in range(0, len(values)):
if values[i] > upper_fence or values[i] < lower_fence:
profile.append(
thicket.dataframe.loc[node].reset_index()["profile"][i]
thicket.dataframe.loc[node].reset_index()[
thicket.profile_idx_name
][i]
)
else:
continue
Expand Down Expand Up @@ -146,7 +148,9 @@ def calc_boxplot_statistics(thicket, columns=[], quartiles=[0.25, 0.5, 0.75], **
for i in range(0, len(values)):
if values[i] > upper_fence or values[i] < lower_fence:
profile.append(
thicket.dataframe[idx].loc[node].reset_index()["profile"][i]
thicket.dataframe[idx]
.loc[node]
.reset_index()[thicket.profile_idx_name][i]
)
else:
continue
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/data/example-json/user_ensemble.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion thicket/tests/test_copy.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_copy(rajaperf_seq_O3_1M_cali, intersection, fill_perfdata):

# Shallow copy of data
node = other.dataframe.index.get_level_values("node")[0]
profile = other.dataframe.index.get_level_values("profile")[0]
profile = other.dataframe.index.get_level_values(other.profile_idx_name)[0]
other.dataframe.loc[(node, profile), "nid"] = -1
assert (
other.dataframe.loc[(node, profile), "nid"]
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_filter_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_filter_profile(rajaperf_cali_1trial):
tk_filt.profile,
tk_filt.profile_mapping.keys(),
tk_filt.metadata.index,
tk_filt.dataframe.index.get_level_values("profile"),
tk_filt.dataframe.index.get_level_values(tk_filt.profile_idx_name),
]:
assert all([prof not in component for prof in rm_profs])
assert all([prof in component for prof in keep_profs])
2 changes: 1 addition & 1 deletion thicket/tests/test_from_statsframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_single_trial(mpi_scaling_cali, intersection, fill_perfdata):
tk = th.Thicket.from_statsframes(th_list, disable_tqdm=True)

# Check level values
assert set(tk.dataframe.index.get_level_values("profile")) == {
assert set(tk.dataframe.index.get_level_values(tk.profile_idx_name)) == {
0,
1,
2,
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def check_groupby(th, columns_values):
th_list = list(th.groupby(column).values())

for thicket in th_list:
check_identity(th, thicket, "default_metric")
check_identity(th, thicket, ["default_metric", "profile_idx_name"])

# inspect all unique values in the use case
for itr, uni_val in enumerate(unique_values):
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def check_query(th, hnids, query):
th_df_profiles.unique().to_list()
)

check_identity(th, filt_th, "default_metric")
check_identity(th, filt_th, ["default_metric", "profile_idx_name"])


def test_query(rajaperf_cuda_block128_1M_cali, intersection, fill_perfdata):
Expand Down
2 changes: 1 addition & 1 deletion thicket/tests/test_query_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def check_query(th_x, hnids, query):
assert all([n in pd.unique(filt_th_df_nodes) for n in sframe_nodes])
assert sorted(pd.unique(filt_th_df_nodes)) == sorted(pd.unique(sframe_nodes))

check_identity(th_x, filt_th, "default_metric")
check_identity(th_x, filt_th, ["default_metric", "profile_idx_name"])


def test_query_stats(rajaperf_cuda_block128_1M_cali, intersection, fill_perfdata):
Expand Down
19 changes: 16 additions & 3 deletions thicket/tests/test_thicket.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_metadata_columns_to_perfdata(
assert "variant" not in tkc2.metadata

# Check error raise for join_key
tkc2.dataframe = tkc2.dataframe.reset_index(level="profile", drop=True)
tkc2.dataframe = tkc2.dataframe.reset_index(level=tkc2.profile_idx_name, drop=True)
with pytest.raises(KeyError, match="'profile' must be present"):
tkc2.metadata_columns_to_perfdata("tuning", overwrite=True)

Expand Down Expand Up @@ -198,8 +198,8 @@ def test_thicketize_graphframe(rajaperf_seq_O3_1M_cali):
assert ht1.graph == th1.graph

# Check dataframes are equivalent when profile level is dropped
th1.dataframe.reset_index(level="profile", inplace=True)
th1.dataframe.drop("profile", axis=1, inplace=True)
th1.dataframe.reset_index(level=th1.profile_idx_name, inplace=True)
th1.dataframe.drop(th1.profile_idx_name, axis=1, inplace=True)
assert ht1.dataframe.equals(th1.dataframe)


Expand All @@ -217,3 +217,16 @@ def test_unique_metadata_base_cuda(
assert res["systype_build"] == ["blueos_3_ppc64le_ib_p9"]
assert res["variant"] == ["Base_CUDA"]
assert res["tuning"] == ["block_128"]


def test_different_profile_idx_name():
th = Thicket(
graph=ht.graph.Graph(roots=[]),
dataframe=pd.DataFrame(
index=pd.MultiIndex(
names=["node", "profile2"], levels=[[], []], codes=[[], []]
)
),
profile_idx_name="profile2",
)
assert th.profile_idx_name == "profile2"
4 changes: 2 additions & 2 deletions thicket/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ def check_identity(
if equal:
assert (
obj1.__dict__[key] is obj2.__dict__[key]
), "{} should have the same identy".format(key)
), "{} should have the same identity".format(key)
else:
assert (
obj1.__dict__[key] is not obj2.__dict__[key]
), "{} should not have the same identy".format(key)
), "{} should not have the same identity".format(key)
46 changes: 29 additions & 17 deletions thicket/thicket.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def __init__(
metadata={},
performance_cols=None,
profile=None,
profile_idx_name="profile",
profile_mapping=None,
statsframe=None,
statsframe_ops_cache=None,
Expand All @@ -81,13 +82,15 @@ def __init__(
performance_cols (list): list of numeric columns within the performance
dataframe
profile (list): list of hashed profile strings
profile_idx_name (str): name of the profile index in the dataframe
profile_mapping (dict): mapping of hashed profile strings to original strings
statsframe (DataFrame): pandas DataFrame indexed by Nodes from the graph
"""
super().__init__(
graph, dataframe, exc_metrics, inc_metrics, default_metric, metadata
)
self.profile = profile
self.profile_idx_name = profile_idx_name
self.profile_mapping = profile_mapping
if statsframe is None:
self.statsframe = GraphFrame(
Expand Down Expand Up @@ -202,12 +205,12 @@ def thicketize_graphframe(gf, prf):
temp_meta = {}
temp_meta[hash_arg] = th.metadata
th.metadata = pd.DataFrame.from_dict(temp_meta, orient="index")
th.metadata.index.set_names("profile", inplace=True)
th.metadata.index.set_names(th.profile_idx_name, inplace=True)

# Add profile to dataframe index
th.dataframe["profile"] = hash_arg
th.dataframe[th.profile_idx_name] = hash_arg
index_names = list(th.dataframe.index.names)
index_names.insert(1, "profile")
index_names.insert(1, th.profile_idx_name)
th.dataframe.reset_index(inplace=True)
th.dataframe.set_index(index_names, inplace=True)

Expand Down Expand Up @@ -590,15 +593,16 @@ def from_json(json_thicket):
dataframe=gf.dataframe,
exc_metrics=thicket_dict["exclusive_metrics"],
inc_metrics=thicket_dict["inclusive_metrics"],
profile=thicket_dict["profile"],
profile=thicket_dict[thicket_dict["profile_idx_name"]],
profile_idx_name=thicket_dict["profile_idx_name"],
profile_mapping=thicket_dict["profile_mapping"],
)

if "metadata" in thicket_dict:
mf = pd.DataFrame(thicket_dict["metadata"])
mf.set_index(mf["profile"], inplace=True)
if "profile" in mf.columns:
mf = mf.drop(columns=["profile"])
mf.set_index(mf[th.profile_idx_name], inplace=True)
if th.profile_idx_name in mf.columns:
mf = mf.drop(columns=[th.profile_idx_name])
th.metadata = mf

# catch condition where there are no stats
Expand Down Expand Up @@ -687,10 +691,10 @@ def _rep_agg_func(col):
agg_data = pd.DataFrame.from_records(rep_data).agg(_rep_agg_func)
# Add node and profile
agg_data["node"] = node_profile[0]
agg_data["profile"] = node_profile[1]
agg_data[self.profile_idx_name] = node_profile[1]
# Append to main df
ncu_df = pd.concat([ncu_df, pd.DataFrame([agg_data])], ignore_index=True)
ncu_df = ncu_df.set_index(["node", "profile"])
ncu_df = ncu_df.set_index(["node", self.profile_idx_name])

# Apply chosen metrics
if chosen_metrics:
Expand All @@ -715,16 +719,19 @@ def _rep_agg_func(col):
)

def metadata_columns_to_perfdata(
self, metadata_columns, overwrite=False, drop=False, join_key="profile"
self, metadata_columns, overwrite=False, drop=False, join_key=None
):
"""Add columns from the metadata table to the performance data table. Joins on join_key, an index or column that is present in both tables.

Arguments:
metadata_columns (list or str): List of the columns from the metadata table
overwrite (bool): Determines overriding behavior in performance data table
drop (bool): Whether to drop the columns from the metadata table afterwards
join_key (str): Name of the index/column to join on if not 'profile'
join_key (str): Name of the index/column to join on if not self.profile_idx_name
"""
if join_key is None:
join_key = self.profile_idx_name

# Raise error if join_key is not present in both tables
if not (
join_key in self.dataframe.reset_index()
Expand Down Expand Up @@ -1145,7 +1152,7 @@ def from_statsframes(tk_list, metadata_key=None, disable_tqdm=False):
# Pre-check of data structures
for tk in tk_list:
verify_thicket_structures(
tk.dataframe, index=["node", "profile"]
tk.dataframe, index=["node", tk.profile_idx_name]
) # Required for deepcopy operation
verify_thicket_structures(
tk.statsframe.dataframe, index=["node"]
Expand Down Expand Up @@ -1256,7 +1263,8 @@ def to_json(self, ensemble=True, metadata=True, stats=True):

jsonified_thicket["inclusive_metrics"] = self.inc_metrics
jsonified_thicket["exclusive_metrics"] = self.exc_metrics
jsonified_thicket["profile"] = self.profile
jsonified_thicket[self.profile_idx_name] = self.profile
jsonified_thicket["profile_idx_name"] = self.profile_idx_name
jsonified_thicket["profile_mapping"] = self.profile_mapping

return json.dumps(jsonified_thicket)
Expand Down Expand Up @@ -1572,7 +1580,9 @@ def groupby(self, by):
# table
profile_id = df.index.values.tolist()
sub_thicket.dataframe = sub_thicket.dataframe[
sub_thicket.dataframe.index.get_level_values("profile").isin(profile_id)
sub_thicket.dataframe.index.get_level_values(
self.profile_idx_name
).isin(profile_id)
]

# clear the aggregated statistics table for current unique group
Expand Down Expand Up @@ -1629,7 +1639,7 @@ def filter_stats(self, filter_function):
def move_metrics_to_statsframe(self, metric_columns, profile=None, override=False):
if not isinstance(metric_columns, (list, tuple)):
raise TypeError("'metric_columns' must be a list or tuple")
profile_list = self.dataframe.index.unique(level="profile").tolist()
profile_list = self.dataframe.index.unique(level=self.profile_idx_name).tolist()
if profile is None and len(profile_list) != 1:
raise ValueError(
"Cannot move a metric to statsframe when there are multiple profiles. Set the 'profile' argument to the profile you want to move"
Expand All @@ -1638,10 +1648,12 @@ def move_metrics_to_statsframe(self, metric_columns, profile=None, override=Fals
raise ValueError("Invalid profile: {}".format(profile))
df_for_profile = None
if profile is None:
df_for_profile = self.dataframe.reset_index(level="profile", drop=True)
df_for_profile = self.dataframe.reset_index(
level=self.profile_idx_name, drop=True
)
else:
df_for_profile = self.dataframe.xs(
profile, level="profile", drop_level=True
profile, level=self.profile_idx_name, drop_level=True
)
new_statsframe_df = self.statsframe.dataframe.copy(deep=True)
for c in metric_columns:
Expand Down
Loading