From f62113b30e2b94b1dbae358ced6d913892889ac3 Mon Sep 17 00:00:00 2001
From: Mohammad Amin <dadgaramin96@gmail.com>
Date: Tue, 5 Mar 2024 15:41:47 +0330
Subject: [PATCH] feat: Added actions matrix creation! note: We still need to
 update the assess_egagement and it is the core_analyzer library.

---
 .../analysis/analytics_interactions_script.py |   2 +-
 .../compute_interaction_matrix_discord.py     |  71 ++++-
 .../analysis/compute_member_activity.py       |  11 +-
 .../utils/compute_interaction_mtx_utils.py    |  21 +-
 .../test_process_non_reaction_heatmaps.py     | 246 ++++++++++++++++++
 5 files changed, 327 insertions(+), 24 deletions(-)
 create mode 100644 tests/unit/test_process_non_reaction_heatmaps.py

diff --git a/discord_analyzer/analysis/analytics_interactions_script.py b/discord_analyzer/analysis/analytics_interactions_script.py
index 837b870..a5ca076 100644
--- a/discord_analyzer/analysis/analytics_interactions_script.py
+++ b/discord_analyzer/analysis/analytics_interactions_script.py
@@ -67,7 +67,7 @@ def per_account_interactions(
         # flatten the list
         samples_flattened = list(itertools.chain(*samples))
 
-        for i, sample in enumerate(samples_flattened):
+        for _, sample in enumerate(samples_flattened):
             account_name = sample[0]["account"]
             interaction_count = sample[0]["count"]
 
diff --git a/discord_analyzer/analysis/compute_interaction_matrix_discord.py b/discord_analyzer/analysis/compute_interaction_matrix_discord.py
index b2337b5..da0b08d 100644
--- a/discord_analyzer/analysis/compute_interaction_matrix_discord.py
+++ b/discord_analyzer/analysis/compute_interaction_matrix_discord.py
@@ -4,8 +4,9 @@
 #  compute_interaction_matrix_discord.py
 #
 #  Author Ene SS Rawa / Tjitse van der Molen
-
-from discord_analyzer.analysis.utils.activity import Activity
+from typing import Any
+import copy
+from tc_core_analyzer_lib.utils.activity import DiscordActivity
 from discord_analyzer.DB_operations.mongodb_access import DB_access
 from discord_analyzer.DB_operations.mongodb_query import MongodbQuery
 from numpy import ndarray
@@ -21,7 +22,13 @@ def compute_interaction_matrix_discord(
     dates: list[str],
     channels: list[str],
     db_access: DB_access,
-    activities: list[str] = [Activity.Mention, Activity.Reply, Activity.Reaction],
+    activities: list[str] = [
+        DiscordActivity.Mention,
+        DiscordActivity.Reply,
+        DiscordActivity.Reaction,
+        DiscordActivity.Lone_msg,
+        DiscordActivity.Thread_msg,
+    ],
 ) -> dict[str, ndarray]:
     """
     Computes interaction matrix from discord data
@@ -34,7 +41,7 @@ def compute_interaction_matrix_discord(
     db_access - obj : database access object
     activities - list[Activity] :
         the list of activities to generate the matrix for
-        default is to include all 3 `Activity` types
+        default is to include all activity types
         minimum length is 1
 
     Output:
@@ -45,8 +52,7 @@ def compute_interaction_matrix_discord(
     """
 
     feature_projection = {
-        "thr_messages": 0,
-        "lone_messages": 0,
+        "channelId": 0,
         "replier": 0,
         "replied": 0,
         "mentioner": 0,
@@ -77,15 +83,66 @@ def compute_interaction_matrix_discord(
     db_results = list(cursor)
 
     per_acc_query_result = prepare_per_account(db_results=db_results)
+    per_acc_interaction = process_non_reactions(per_acc_query_result)
 
     # And now compute the interactions per account_name (`acc`)
     int_mat = {}
     # computing `int_mat` per activity
     for activity in activities:
         int_mat[activity] = generate_interaction_matrix(
-            per_acc_interactions=per_acc_query_result,
+            per_acc_interactions=per_acc_interaction,
             acc_names=acc_names,
             activities=[activity],
         )
 
     return int_mat
+
+
+def process_non_reactions(
+    heatmaps_data_per_acc: dict[str, list[dict[str, Any]]],
+    skip_fields: list[str] = [
+        "reacted_per_acc",
+        "mentioner_per_acc",
+        "replied_per_acc",
+        "account_name",
+        "date",
+    ],
+) -> dict[str, list[dict[str, Any]]]:
+    """
+    process the non-interactions heatmap data to be like interaction
+    we will make it self interactions
+
+    Parameters
+    -----------
+    heatmaps_data_per_acc : dict[str, list[dict[str, Any]]]
+        heatmaps data per account
+        the keys are accounts
+        and the values are the list of heatmaps documents related to them
+    skip_fields : list[str]
+        the part of heatmaps document that we don't need to make them like interaction
+        can be interactions itself and account_name, and date
+
+    Returns
+    --------
+    heatmaps_interactions_per_acc : dict[str, list[dict[str, Any]]]
+        the same as before but we have changed the non interaction ones to self interaction
+    """
+    heatmaps_interactions_per_acc = copy.deepcopy(heatmaps_data_per_acc)
+
+    for account in heatmaps_interactions_per_acc.keys():
+        # for each heatmaps document
+        for document in heatmaps_interactions_per_acc[account]:
+            activities = document.keys()
+            actions = set(activities) - set(skip_fields)
+
+            for action in actions:
+                action_count = sum(document[action])
+                if action_count:
+                    document[action] = [
+                        [{"account": account, "count": sum(document[action])}]
+                    ]
+                else:
+                    # action count was zero
+                    document[action] = []
+
+    return heatmaps_interactions_per_acc
diff --git a/discord_analyzer/analysis/compute_member_activity.py b/discord_analyzer/analysis/compute_member_activity.py
index b1a0dc4..d214c58 100644
--- a/discord_analyzer/analysis/compute_member_activity.py
+++ b/discord_analyzer/analysis/compute_member_activity.py
@@ -214,12 +214,13 @@ def compute_member_activity(
         last_start = time_diff - relativedelta(days=window_param["period_size"] - 1)
 
         # # # ACTUAL ANALYSIS # # #
-
         assess_engagment = EngagementAssessment(
             activities=[
                 DiscordActivity.Mention,
                 DiscordActivity.Reply,
                 DiscordActivity.Reaction,
+                DiscordActivity.Lone_msg,
+                DiscordActivity.Mention,
             ],
             activities_ignore_0_axis=[DiscordActivity.Mention],
             activities_ignore_1_axis=[],
@@ -288,10 +289,10 @@ def compute_member_activity(
                     acc_names, date_list_w_str, channels, db_access
                 )
 
-                # for each int_mat type
-                for key in list(int_mat.keys()):
-                    # remove interactions with self
-                    int_mat[key][np.diag_indices_from(int_mat[key])] = 0
+                # # for each int_mat type
+                # for key in list(int_mat.keys()):
+                #     # remove interactions with self
+                #     int_mat[key][np.diag_indices_from(int_mat[key])] = 0
 
                 # assess engagement
                 (graph_out, *activity_dict) = assess_engagment.compute(
diff --git a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py
index f3d8636..d77ef10 100644
--- a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py
+++ b/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py
@@ -5,7 +5,7 @@
 from discord_analyzer.analysis.analytics_interactions_script import (
     per_account_interactions,
 )
-from discord_analyzer.analysis.utils.activity import Activity
+from tc_core_analyzer_lib.utils.activity import DiscordActivity
 
 
 def prepare_per_account(db_results: list) -> dict[str, list[dict]]:
@@ -29,13 +29,9 @@ def prepare_per_account(db_results: list) -> dict[str, list[dict]]:
 
     # a dictionary for results of each account
     for db_record in db_results:
-        # if the data for a specific account was not created before, create one as list
         acc_name = db_record["account_name"]
-        if acc_name not in per_acc_query_result.keys():
-            per_acc_query_result[acc_name] = [db_record]
-        # else, append
-        else:
-            per_acc_query_result[acc_name].append(db_record)
+        per_acc_query_result.setdefault(acc_name, [])
+        per_acc_query_result[acc_name].append(db_record)
 
     return per_acc_query_result
 
@@ -66,7 +62,6 @@ def generate_interaction_matrix(
         an array of integer values
         each row and column are representative of account interactions
     """
-
     int_matrix = np.zeros((len(acc_names), len(acc_names)), dtype=np.uint16)
 
     for acc in per_acc_interactions.keys():
@@ -117,12 +112,16 @@ def prepare_interaction_field_names(activities: list[str]) -> list[str]:
     """
     field_names = []
     for activity in activities:
-        if activity == Activity.Mention:
+        if activity == DiscordActivity.Mention:
             field_names.append("mentioner_per_acc")
-        elif activity == Activity.Reply:
+        elif activity == DiscordActivity.Reply:
             field_names.append("replied_per_acc")
-        elif activity == Activity.Reaction:
+        elif activity == DiscordActivity.Reaction:
             field_names.append("reacted_per_acc")
+        elif activity == DiscordActivity.Thread_msg:
+            field_names.append("thr_messages")
+        elif activity == DiscordActivity.Lone_msg:
+            field_names.append("lone_messages")
         else:
             logging.warning("prepare_interaction_field_names: Wrong activity given!")
 
diff --git a/tests/unit/test_process_non_reaction_heatmaps.py b/tests/unit/test_process_non_reaction_heatmaps.py
new file mode 100644
index 0000000..853d62b
--- /dev/null
+++ b/tests/unit/test_process_non_reaction_heatmaps.py
@@ -0,0 +1,246 @@
+from unittest import TestCase
+
+from discord_analyzer.analysis.compute_interaction_matrix_discord import (
+    process_non_reactions,
+)
+import numpy as np
+
+
+class TestProcessNonReactions(TestCase):
+    def test_empty_inputs(self):
+        intput_data = {}
+        results = process_non_reactions(heatmaps_data_per_acc=intput_data)
+        self.assertEqual(results, {})
+
+    def test_single_account_no_action(self):
+        # 24 hours
+        zeros_vector = np.zeros(24)
+        input_data = {
+            "acc1": [
+                {
+                    "lone_messages": zeros_vector,
+                    "thr_messages": zeros_vector,
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ]
+        }
+        results = process_non_reactions(input_data)
+
+        expected_results = {
+            "acc1": [
+                {
+                    "lone_messages": [],
+                    "thr_messages": [],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ]
+        }
+        self.assertEqual(results, expected_results)
+
+    def test_single_account_with_action(self):
+        lone_messages = np.zeros(24)
+        # 3 channel messages at hour 6
+        lone_messages[5] = 3
+
+        thr_messages = np.zeros(24)
+        thr_messages[1] = 1
+
+        input_data = {
+            "acc1": [
+                {
+                    "lone_messages": lone_messages,
+                    "thr_messages": thr_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ]
+        }
+        results = process_non_reactions(input_data)
+        expected_results = {
+            "acc1": [
+                {
+                    "lone_messages": [[{"account": "acc1", "count": 3}]],
+                    "thr_messages": [[{"account": "acc1", "count": 1}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ]
+        }
+        self.assertEqual(results, expected_results)
+
+    def test_multiple_account_with_action(self):
+        user1_lone_messages = np.zeros(24)
+        # 3 channel messages from hour 6 to 7
+        user1_lone_messages[5] = 3
+
+        user1_thr_messages = np.zeros(24)
+        user1_thr_messages[1] = 1
+
+        user2_thr_messages = np.zeros(24)
+        user2_thr_messages[7] = 5
+        user2_thr_messages[20] = 2
+
+        input_data = {
+            "acc1": [
+                {
+                    "lone_messages": user1_lone_messages,
+                    "thr_messages": user1_thr_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-01",
+                }
+            ],
+            "acc2": [
+                {
+                    "lone_messages": np.zeros(24),
+                    "thr_messages": user2_thr_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc5", "count": 3}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ],
+        }
+        results = process_non_reactions(input_data)
+
+        expected_results = {
+            "acc1": [
+                {
+                    "lone_messages": [[{"account": "acc1", "count": 3}]],
+                    "thr_messages": [[{"account": "acc1", "count": 1}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-01",
+                }
+            ],
+            "acc2": [
+                {
+                    "lone_messages": [],
+                    "thr_messages": [[{"account": "acc2", "count": 7}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc5", "count": 3}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ],
+        }
+        self.assertEqual(results, expected_results)
+
+    def test_multiple_account_multiple_documents_with_action(self):
+        user1_lone_messages = np.zeros(24)
+        # 3 channel messages from hour 6 to 7
+        user1_lone_messages[5] = 3
+
+        user1_thr_messages = np.zeros(24)
+        user1_thr_messages[1] = 1
+
+        user2_thr_messages = np.zeros(24)
+        user2_thr_messages[7] = 5
+        user2_thr_messages[20] = 2
+
+        input_data = {
+            "acc1": [
+                {
+                    "lone_messages": user1_lone_messages,
+                    "thr_messages": user1_thr_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-01",
+                },
+                {
+                    "lone_messages": np.zeros(24),
+                    "thr_messages": user1_lone_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-02",
+                },
+            ],
+            "acc2": [
+                {
+                    "lone_messages": np.zeros(24),
+                    "thr_messages": user2_thr_messages,
+                    "reacted_per_acc": [
+                        [{"account": "acc5", "count": 3}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ],
+        }
+        results = process_non_reactions(input_data)
+
+        expected_results = {
+            "acc1": [
+                {
+                    "lone_messages": [[{"account": "acc1", "count": 3}]],
+                    "thr_messages": [[{"account": "acc1", "count": 1}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-01",
+                },
+                {
+                    "lone_messages": [],
+                    "thr_messages": [[{"account": "acc1", "count": 3}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc2", "count": 1}],
+                        [{"account": "acc3", "count": 5}],
+                    ],
+                    "replied_per_acc": {},
+                    "date": "2024-01-02",
+                },
+            ],
+            "acc2": [
+                {
+                    "lone_messages": [],
+                    "thr_messages": [[{"account": "acc2", "count": 7}]],
+                    # others same as before
+                    "reacted_per_acc": [
+                        [{"account": "acc5", "count": 3}],
+                    ],
+                    "replied_per_acc": [],
+                    "date": "2024-01-01",
+                }
+            ],
+        }
+        self.assertEqual(results, expected_results)