From f62113b30e2b94b1dbae358ced6d913892889ac3 Mon Sep 17 00:00:00 2001 From: Mohammad Amin Date: Tue, 5 Mar 2024 15:41:47 +0330 Subject: [PATCH] feat: Added actions matrix creation! note: We still need to update the assess_egagement and it is the core_analyzer library. --- .../analysis/analytics_interactions_script.py | 2 +- .../compute_interaction_matrix_discord.py | 71 ++++- .../analysis/compute_member_activity.py | 11 +- .../utils/compute_interaction_mtx_utils.py | 21 +- .../test_process_non_reaction_heatmaps.py | 246 ++++++++++++++++++ 5 files changed, 327 insertions(+), 24 deletions(-) create mode 100644 tests/unit/test_process_non_reaction_heatmaps.py diff --git a/discord_analyzer/analysis/analytics_interactions_script.py b/discord_analyzer/analysis/analytics_interactions_script.py index 837b870..a5ca076 100644 --- a/discord_analyzer/analysis/analytics_interactions_script.py +++ b/discord_analyzer/analysis/analytics_interactions_script.py @@ -67,7 +67,7 @@ def per_account_interactions( # flatten the list samples_flattened = list(itertools.chain(*samples)) - for i, sample in enumerate(samples_flattened): + for _, sample in enumerate(samples_flattened): account_name = sample[0]["account"] interaction_count = sample[0]["count"] diff --git a/discord_analyzer/analysis/compute_interaction_matrix_discord.py b/discord_analyzer/analysis/compute_interaction_matrix_discord.py index b2337b5..da0b08d 100644 --- a/discord_analyzer/analysis/compute_interaction_matrix_discord.py +++ b/discord_analyzer/analysis/compute_interaction_matrix_discord.py @@ -4,8 +4,9 @@ # compute_interaction_matrix_discord.py # # Author Ene SS Rawa / Tjitse van der Molen - -from discord_analyzer.analysis.utils.activity import Activity +from typing import Any +import copy +from tc_core_analyzer_lib.utils.activity import DiscordActivity from discord_analyzer.DB_operations.mongodb_access import DB_access from discord_analyzer.DB_operations.mongodb_query import MongodbQuery from numpy import ndarray @@ -21,7 +22,13 @@ def compute_interaction_matrix_discord( dates: list[str], channels: list[str], db_access: DB_access, - activities: list[str] = [Activity.Mention, Activity.Reply, Activity.Reaction], + activities: list[str] = [ + DiscordActivity.Mention, + DiscordActivity.Reply, + DiscordActivity.Reaction, + DiscordActivity.Lone_msg, + DiscordActivity.Thread_msg, + ], ) -> dict[str, ndarray]: """ Computes interaction matrix from discord data @@ -34,7 +41,7 @@ def compute_interaction_matrix_discord( db_access - obj : database access object activities - list[Activity] : the list of activities to generate the matrix for - default is to include all 3 `Activity` types + default is to include all activity types minimum length is 1 Output: @@ -45,8 +52,7 @@ def compute_interaction_matrix_discord( """ feature_projection = { - "thr_messages": 0, - "lone_messages": 0, + "channelId": 0, "replier": 0, "replied": 0, "mentioner": 0, @@ -77,15 +83,66 @@ def compute_interaction_matrix_discord( db_results = list(cursor) per_acc_query_result = prepare_per_account(db_results=db_results) + per_acc_interaction = process_non_reactions(per_acc_query_result) # And now compute the interactions per account_name (`acc`) int_mat = {} # computing `int_mat` per activity for activity in activities: int_mat[activity] = generate_interaction_matrix( - per_acc_interactions=per_acc_query_result, + per_acc_interactions=per_acc_interaction, acc_names=acc_names, activities=[activity], ) return int_mat + + +def process_non_reactions( + heatmaps_data_per_acc: dict[str, list[dict[str, Any]]], + skip_fields: list[str] = [ + "reacted_per_acc", + "mentioner_per_acc", + "replied_per_acc", + "account_name", + "date", + ], +) -> dict[str, list[dict[str, Any]]]: + """ + process the non-interactions heatmap data to be like interaction + we will make it self interactions + + Parameters + ----------- + heatmaps_data_per_acc : dict[str, list[dict[str, Any]]] + heatmaps data per account + the keys are accounts + and the values are the list of heatmaps documents related to them + skip_fields : list[str] + the part of heatmaps document that we don't need to make them like interaction + can be interactions itself and account_name, and date + + Returns + -------- + heatmaps_interactions_per_acc : dict[str, list[dict[str, Any]]] + the same as before but we have changed the non interaction ones to self interaction + """ + heatmaps_interactions_per_acc = copy.deepcopy(heatmaps_data_per_acc) + + for account in heatmaps_interactions_per_acc.keys(): + # for each heatmaps document + for document in heatmaps_interactions_per_acc[account]: + activities = document.keys() + actions = set(activities) - set(skip_fields) + + for action in actions: + action_count = sum(document[action]) + if action_count: + document[action] = [ + [{"account": account, "count": sum(document[action])}] + ] + else: + # action count was zero + document[action] = [] + + return heatmaps_interactions_per_acc diff --git a/discord_analyzer/analysis/compute_member_activity.py b/discord_analyzer/analysis/compute_member_activity.py index b1a0dc4..d214c58 100644 --- a/discord_analyzer/analysis/compute_member_activity.py +++ b/discord_analyzer/analysis/compute_member_activity.py @@ -214,12 +214,13 @@ def compute_member_activity( last_start = time_diff - relativedelta(days=window_param["period_size"] - 1) # # # ACTUAL ANALYSIS # # # - assess_engagment = EngagementAssessment( activities=[ DiscordActivity.Mention, DiscordActivity.Reply, DiscordActivity.Reaction, + DiscordActivity.Lone_msg, + DiscordActivity.Mention, ], activities_ignore_0_axis=[DiscordActivity.Mention], activities_ignore_1_axis=[], @@ -288,10 +289,10 @@ def compute_member_activity( acc_names, date_list_w_str, channels, db_access ) - # for each int_mat type - for key in list(int_mat.keys()): - # remove interactions with self - int_mat[key][np.diag_indices_from(int_mat[key])] = 0 + # # for each int_mat type + # for key in list(int_mat.keys()): + # # remove interactions with self + # int_mat[key][np.diag_indices_from(int_mat[key])] = 0 # assess engagement (graph_out, *activity_dict) = assess_engagment.compute( diff --git a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py index f3d8636..d77ef10 100644 --- a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py +++ b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py @@ -5,7 +5,7 @@ from discord_analyzer.analysis.analytics_interactions_script import ( per_account_interactions, ) -from discord_analyzer.analysis.utils.activity import Activity +from tc_core_analyzer_lib.utils.activity import DiscordActivity def prepare_per_account(db_results: list) -> dict[str, list[dict]]: @@ -29,13 +29,9 @@ def prepare_per_account(db_results: list) -> dict[str, list[dict]]: # a dictionary for results of each account for db_record in db_results: - # if the data for a specific account was not created before, create one as list acc_name = db_record["account_name"] - if acc_name not in per_acc_query_result.keys(): - per_acc_query_result[acc_name] = [db_record] - # else, append - else: - per_acc_query_result[acc_name].append(db_record) + per_acc_query_result.setdefault(acc_name, []) + per_acc_query_result[acc_name].append(db_record) return per_acc_query_result @@ -66,7 +62,6 @@ def generate_interaction_matrix( an array of integer values each row and column are representative of account interactions """ - int_matrix = np.zeros((len(acc_names), len(acc_names)), dtype=np.uint16) for acc in per_acc_interactions.keys(): @@ -117,12 +112,16 @@ def prepare_interaction_field_names(activities: list[str]) -> list[str]: """ field_names = [] for activity in activities: - if activity == Activity.Mention: + if activity == DiscordActivity.Mention: field_names.append("mentioner_per_acc") - elif activity == Activity.Reply: + elif activity == DiscordActivity.Reply: field_names.append("replied_per_acc") - elif activity == Activity.Reaction: + elif activity == DiscordActivity.Reaction: field_names.append("reacted_per_acc") + elif activity == DiscordActivity.Thread_msg: + field_names.append("thr_messages") + elif activity == DiscordActivity.Lone_msg: + field_names.append("lone_messages") else: logging.warning("prepare_interaction_field_names: Wrong activity given!") diff --git a/tests/unit/test_process_non_reaction_heatmaps.py b/tests/unit/test_process_non_reaction_heatmaps.py new file mode 100644 index 0000000..853d62b --- /dev/null +++ b/tests/unit/test_process_non_reaction_heatmaps.py @@ -0,0 +1,246 @@ +from unittest import TestCase + +from discord_analyzer.analysis.compute_interaction_matrix_discord import ( + process_non_reactions, +) +import numpy as np + + +class TestProcessNonReactions(TestCase): + def test_empty_inputs(self): + intput_data = {} + results = process_non_reactions(heatmaps_data_per_acc=intput_data) + self.assertEqual(results, {}) + + def test_single_account_no_action(self): + # 24 hours + zeros_vector = np.zeros(24) + input_data = { + "acc1": [ + { + "lone_messages": zeros_vector, + "thr_messages": zeros_vector, + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ] + } + results = process_non_reactions(input_data) + + expected_results = { + "acc1": [ + { + "lone_messages": [], + "thr_messages": [], + # others same as before + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ] + } + self.assertEqual(results, expected_results) + + def test_single_account_with_action(self): + lone_messages = np.zeros(24) + # 3 channel messages at hour 6 + lone_messages[5] = 3 + + thr_messages = np.zeros(24) + thr_messages[1] = 1 + + input_data = { + "acc1": [ + { + "lone_messages": lone_messages, + "thr_messages": thr_messages, + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ] + } + results = process_non_reactions(input_data) + expected_results = { + "acc1": [ + { + "lone_messages": [[{"account": "acc1", "count": 3}]], + "thr_messages": [[{"account": "acc1", "count": 1}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ] + } + self.assertEqual(results, expected_results) + + def test_multiple_account_with_action(self): + user1_lone_messages = np.zeros(24) + # 3 channel messages from hour 6 to 7 + user1_lone_messages[5] = 3 + + user1_thr_messages = np.zeros(24) + user1_thr_messages[1] = 1 + + user2_thr_messages = np.zeros(24) + user2_thr_messages[7] = 5 + user2_thr_messages[20] = 2 + + input_data = { + "acc1": [ + { + "lone_messages": user1_lone_messages, + "thr_messages": user1_thr_messages, + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-01", + } + ], + "acc2": [ + { + "lone_messages": np.zeros(24), + "thr_messages": user2_thr_messages, + "reacted_per_acc": [ + [{"account": "acc5", "count": 3}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ], + } + results = process_non_reactions(input_data) + + expected_results = { + "acc1": [ + { + "lone_messages": [[{"account": "acc1", "count": 3}]], + "thr_messages": [[{"account": "acc1", "count": 1}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-01", + } + ], + "acc2": [ + { + "lone_messages": [], + "thr_messages": [[{"account": "acc2", "count": 7}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc5", "count": 3}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ], + } + self.assertEqual(results, expected_results) + + def test_multiple_account_multiple_documents_with_action(self): + user1_lone_messages = np.zeros(24) + # 3 channel messages from hour 6 to 7 + user1_lone_messages[5] = 3 + + user1_thr_messages = np.zeros(24) + user1_thr_messages[1] = 1 + + user2_thr_messages = np.zeros(24) + user2_thr_messages[7] = 5 + user2_thr_messages[20] = 2 + + input_data = { + "acc1": [ + { + "lone_messages": user1_lone_messages, + "thr_messages": user1_thr_messages, + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-01", + }, + { + "lone_messages": np.zeros(24), + "thr_messages": user1_lone_messages, + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-02", + }, + ], + "acc2": [ + { + "lone_messages": np.zeros(24), + "thr_messages": user2_thr_messages, + "reacted_per_acc": [ + [{"account": "acc5", "count": 3}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ], + } + results = process_non_reactions(input_data) + + expected_results = { + "acc1": [ + { + "lone_messages": [[{"account": "acc1", "count": 3}]], + "thr_messages": [[{"account": "acc1", "count": 1}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-01", + }, + { + "lone_messages": [], + "thr_messages": [[{"account": "acc1", "count": 3}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc2", "count": 1}], + [{"account": "acc3", "count": 5}], + ], + "replied_per_acc": {}, + "date": "2024-01-02", + }, + ], + "acc2": [ + { + "lone_messages": [], + "thr_messages": [[{"account": "acc2", "count": 7}]], + # others same as before + "reacted_per_acc": [ + [{"account": "acc5", "count": 3}], + ], + "replied_per_acc": [], + "date": "2024-01-01", + } + ], + } + self.assertEqual(results, expected_results)