diff --git a/.github/workflows/production.yml b/.github/workflows/production.yml index a1be27b..de44b12 100644 --- a/.github/workflows/production.yml +++ b/.github/workflows/production.yml @@ -9,4 +9,29 @@ jobs: ci: uses: TogetherCrew/operations/.github/workflows/ci.yml@main secrets: - CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} \ No newline at end of file + CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} + package_publish: + needs: ci + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip setuptools wheel twine + - name: Build package + run: | + python setup.py sdist bdist_wheel + - name: Publish package to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python -m twine upload dist/* \ No newline at end of file diff --git a/analyzer_init.py b/analyzer_init.py deleted file mode 100644 index 9cfdcaf..0000000 --- a/analyzer_init.py +++ /dev/null @@ -1,45 +0,0 @@ -from typing import Any - -from discord_analyzer import RnDaoAnalyzer -from utils.credentials import get_mongo_credentials - - -class AnalyzerInit: - """ - initialize the analyzer with its configs - """ - - # TODO: update to platform_id as input - def __init__(self, guild_id: str) -> None: - self.guild_id = guild_id - - def get_analyzer(self) -> RnDaoAnalyzer: - """ - Returns: - --------- - analyzer : RnDaoAnalyzer - """ - analyzer = RnDaoAnalyzer(self.guild_id) - - # credentials - mongo_creds = get_mongo_credentials() - - analyzer.set_mongo_database_info( - mongo_db_host=mongo_creds["host"], - mongo_db_password=mongo_creds["password"], - mongo_db_port=mongo_creds["port"], - mongo_db_user=mongo_creds["user"], - ) - analyzer.database_connect() - - return analyzer - - def _get_mongo_connection(self, mongo_creds: dict[str, Any]): - user = mongo_creds["user"] - password = mongo_creds["password"] - host = mongo_creds["host"] - port = mongo_creds["port"] - - connection = f"mongodb://{user}:{password}@{host}:{port}" - - return connection diff --git a/discord_analyzer/DB_operations/mongodb_query.py b/discord_analyzer/DB_operations/mongodb_query.py deleted file mode 100644 index 549efcf..0000000 --- a/discord_analyzer/DB_operations/mongodb_query.py +++ /dev/null @@ -1,139 +0,0 @@ -class MongodbQuery: - def __init__(self) -> None: - """ - create different queries to query the database - """ - pass - - def create_query_filter_account_channel_dates( - self, - acc_names, - channels, - dates, - variable_aggregation_type="and", - date_key="date", - channel_key="channelId", - account_key="account_name", - ): - """ - A query to filter the database on account_name, - and/or channel_names, and/or dates. - the aggregation of varibales (`account_name`, `channels`, and `dates`) - can be set to `and` or `or` - - Parameters: - ------------ - acc_names : list of string - each string is an account name that needs to be included. - The minimum length of this list is 1 - channels : list of string - each string is a channel identifier for - the channels that need to be included. - The minimum length of this list is 1 - dates : list of datetime - each datetime object is a date that needs to be included. - The minimum length of this list is 1 - should be in type of `%Y-%m-%d` which is the exact database format - variable_aggregation_type : string - values can be [`and`, `or`], the aggregation type between the variables - (variables are `acc_names`, `channels`, and `dates`) - `or` represents the or between the queries of acc_name, channels, dates - `and` represents the and between the queries of acc_name, channels, dates - default value is `and` - value_aggregation_type : string - values can be [`and`, `or`], the aggregation type between the - values of each variable - `or` represents the `or` operation between the values of input arrays - `and` represents the `and` operation between the values of input arrays - default value is `or` - date_key : string - the name of the field of date in database - default is `date` - channel_key : string - the id of the field of channel name in database - default is `channelId` - account_key : string - the name of the field account name in the database - default is `account_name` - Returns: - ---------- - query : dictionary - the query to get access - """ - # creating the query - query = { - "$" - + variable_aggregation_type: [ - {account_key: {"$in": acc_names}}, - {channel_key: {"$in": channels}}, - {date_key: {"$in": dates}}, - ] - } - - return query - - def create_query_channel(self, channels_name): - """ - create a dictionary of query to get channel_id using channel_name - Parameters: - ------------- - channel_name : list - a list of channel names to retrive their id - - Returns: - --------- - query : dictionary - the query to retrieve the channel ids - """ - query_channelId = {"channel": {"$in": channels_name}} - - return query_channelId - - def create_query_threads( - self, channels_id, dates, channelsId_key="channelId", date_key="date" - ) -> dict: - """ - create a dictionary of query to query the DB, - getting the messages for specific channels and dates - Parameters: - ------------ - channels_id : list - list of strings, each string is a channel - identifier for the channels that needs to be included. - The minimum length of this list is 1 - dates : list - list of datetime objects, each datetime - object is a date that needs to be included. - The minimum length of this list is 1 - channelsId_key : string - the field name corresponding to chnnel id in database - default value is `channelId` - date_key : string - the field name corresponding to date in database - default value is `date` - - Returns: - --------- - query : dictionary - a dictionary that query the database - """ - # Array inputs checking - if len(channels_id) < 1: - raise ValueError("channels_id array is empty!") - if len(dates) < 1: - raise ValueError("dates array is empty!") - - datetime_query = [] - for date in dates: - datetime_query.append({date_key: {"$regex": date}}) - - query = { - "$and": [ - {channelsId_key: {"$in": channels_id}}, - {"$or": datetime_query}, - # do not return the messages with no thread - {"thread": {"$ne": "None"}}, - ] - } - - return query diff --git a/discord_analyzer/DB_operations/network_graph.py b/discord_analyzer/DB_operations/network_graph.py deleted file mode 100644 index 466d653..0000000 --- a/discord_analyzer/DB_operations/network_graph.py +++ /dev/null @@ -1,300 +0,0 @@ -# Store and Rietrive the network graph from neo4j db - -import datetime - -import networkx -from tc_neo4j_lib import Query - - -def make_neo4j_networkx_query_dict( - networkx_graphs: dict[datetime.datetime, networkx.classes.graph.Graph], - guildId: str, - community_id: str, -) -> list[Query]: - """ - make a list of queries to store networkx graphs into the neo4j - - Parameters: - ------------- - networkx_graphs : dictionary of networkx.classes.graph.Graph - or networkx.classes.digraph.DiGraph - the dictinoary keys is the date of graph and the values - are the actual networkx graphs - guildId : str - the guild that the members belong to - community_id : str - the community id to save the data for - - Returns: - ----------- - queries_list : list[Query] - list of string queries to store data into neo4j - """ - # extract the graphs and their corresponding interaction dates - graph_list, graph_dates = list(networkx_graphs.values()), list( - networkx_graphs.keys() - ) - - # make a list of queries for each date to save - # the Useraccount and INTERACTED relation between them - queries_list = make_graph_list_query( - networkx_graphs=graph_list, - networkx_dates=graph_dates, - guildId=guildId, - community_id=community_id, - toGuildRelation="IS_MEMBER", - ) - - return queries_list - - -def make_graph_list_query( - networkx_graphs: networkx.classes.graph.Graph, - networkx_dates: list[datetime.datetime], - guildId: str, - community_id: str, - toGuildRelation: str = "IS_MEMBER", -) -> list[Query]: - """ - Make a list of queries for each graph to save their results - - Parameters: - ------------- - networkx_graphs : list of networkx.classes.graph.Graph - or networkx.classes.digraph.DiGraph - the list of graph created from user interactions - networkx_dates : list of dates - the dates for each graph - guildId : str - the guild that the members belong to - default is `None` meaning that it wouldn't be belonged to any guild - community_id : str - the community id to save the data for - toGuildRelation : str - the relationship label that connect the users to guilds - default value is `IS_MEMBER` - - Returns: - --------- - final_queries : list[Query] - list of strings, each is a query for an interaction graph to be created - """ - final_queries: list[Query] = [] - - for graph, date in zip(networkx_graphs, networkx_dates): - nodes_dict = graph.nodes.data() - edges_dict = graph.edges.data() - - node_queries, query_relations = create_network_query( - nodes_dict, - edges_dict, - date, - guildId=guildId, - toGuildRelation=toGuildRelation, - ) - community_query = create_community_node_query(community_id, guildId) - - final_queries.extend(node_queries) - final_queries.extend(query_relations) - final_queries.append(community_query) - - return final_queries - - -def create_community_node_query( - community_id: str, - guild_id: str, - community_node: str = "Community", -) -> Query: - """ - create the community node - - Parameters - ------------ - community_id : str - the community id to create its node - guild_id : str - the guild node to attach to community - - Returns - --------- - query : Query - the query to run on neo4j to create community node - """ - date_now_timestamp = get_timestamp() - - query_str = f""" - MERGE (g:Guild {{guildId: $guild_id}}) - ON CREATE SET g.createdAt = $date_now - WITH g - MERGE (c:{community_node} {{id: $community_id}}) - ON CREATE SET c.createdAt = $date_now - WITH g, c - MERGE (g) -[r:IS_WITHIN]-> (c) - ON CREATE SET r.createdAt = $date_now - """ - - parameters = { - "guild_id": guild_id, - "date_now": int(date_now_timestamp), - "community_id": community_id, - } - query = Query(query_str, parameters) - - return query - - -def create_network_query( - nodes_dict: networkx.classes.reportviews.NodeDataView, - edge_dict: networkx.classes.reportviews.EdgeDataView, - graph_date: datetime.datetime, - guildId: str, - nodes_type: str = "DiscordAccount", - rel_type: str = "INTERACTED_WITH", - toGuildRelation: str = "IS_MEMBER", -) -> tuple[list[Query], list[Query]]: - """ - make string query to save the accounts with their - account_name and relationships with their relation from **a graph**. - The query to add the nodes and edges is using `MERGE` operator - of Neo4j db since it won't create duplicate nodes and edges - if the relation and the account was saved before - - Parameters: - ------------- - nodes_dict : NodeDataView - the nodes of a Networkx graph - edge_dict : EdgeDataView - the edges of a Networkx graph - graph_date : datetime - the date of the interaction in as a python datetime object - nodes_type : str - the type of nodes to be saved - default is `Account` - rel_type : str - the type of relationship to create - default is `INTERACTED` - - Returns: - ---------- - node_queries : list[Query] - the list of MERGE queries for creating all nodes - rel_queries : list[Query] - the list of MERGE queries for creating all relationships - """ - # getting the timestamp `date` - graph_date_timestamp = get_timestamp(graph_date) - date_now_timestamp = get_timestamp() - - # initializiation of queries - rel_queries: list[Query] = [] - node_queries: list[Query] = [] - - for node in nodes_dict: - node_str_query = "" - # retrieving node data - # user number - node_num = node[0] - # user account name - node_acc_name = node[1]["acc_name"] - # creating the query - node_str_query += ( - f"MERGE (a{node_num}:{nodes_type} {{userId: $node_acc_name}}) " - ) - node_str_query += f"""ON CREATE SET a{node_num}.createdAt = - $date_now_timestamp - """ - - # relationship query between users and guilds - if guildId is not None: - # creating the guilds if they weren't created before - node_str_query += f"""MERGE (g:Guild {{guildId: '{guildId}'}}) - ON CREATE SET g.createdAt = $date_now_timestamp - """ - - node_str_query += f""" - MERGE (a{node_num}) - -[rel_guild{node_num}:{toGuildRelation}]-> (g) - ON CREATE SET - rel_guild{node_num}.createdAt = $date_now_timestamp - """ - - parameters = { - "node_acc_name": node_acc_name, - "date_now_timestamp": int(date_now_timestamp), - } - query_str = node_str_query + ";" - - node_queries.append(Query(query_str, parameters)) - - for idx, edge in enumerate(edge_dict): - rel_str_query = "" - - # retrieving edge data - - # relationship from user number - starting_acc_num = edge[0] - # relationship to user number - ending_acc_num = edge[1] - - starting_node_acc_name = nodes_dict[starting_acc_num]["acc_name"] - ending_node_acc_name = nodes_dict[ending_acc_num]["acc_name"] - - # the interaction count between them - interaction_count = edge[2]["weight"] - - rel_str_query += f"""MATCH (a{starting_acc_num}:{nodes_type} - {{userId: $starting_node_acc_name}}) - MATCH (a{ending_acc_num}:{nodes_type} - {{userId: $ending_node_acc_name}}) - MERGE - (a{starting_acc_num}) -[rel{idx}:{rel_type} - {{ - date: $date, - weight: $weight, - guildId: $guild_id - }} - ]-> (a{ending_acc_num}) - """ - query_str = rel_str_query + ";" - parameters = { - "starting_node_acc_name": starting_node_acc_name, - "ending_node_acc_name": ending_node_acc_name, - "date": int(graph_date_timestamp), - "weight": int(interaction_count), - "guild_id": guildId, - } - rel_queries.append(Query(query_str, parameters)) - - return node_queries, rel_queries - - -def get_timestamp(time: datetime.datetime | None = None) -> float: - """ - get the timestamp of the given time or just now - - Parameters - ------------ - time : datetime.datetime - the time to get its timestamp - default is `None` meaning to send the time of now - - Returns - -------- - timestamp : float - the timestamp of the time multiplied to 1000 - """ - using_time: datetime.datetime - if time is not None: - using_time = time - else: - using_time = datetime.datetime.now() - - timestamp = ( - using_time.replace( - hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc - ).timestamp() - * 1000 - ) - - return timestamp diff --git a/discord_analyzer/__init__.py b/discord_analyzer/__init__.py deleted file mode 100644 index 3c8cd39..0000000 --- a/discord_analyzer/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env python3 -# flake8: noqa -from .rn_analyzer import RnDaoAnalyzer diff --git a/discord_analyzer/analysis/activity_hourly.py b/discord_analyzer/analysis/activity_hourly.py deleted file mode 100644 index b231925..0000000 --- a/discord_analyzer/analysis/activity_hourly.py +++ /dev/null @@ -1,657 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# activity_hourly.py -# -# Author Ene SS Rawa / Tjitse van der Molen - - -# # # # # import libraries # # # # # - -import json - -import numpy as np - - -def parse_reaction(s): - result = [] - for subitem in s: - items = subitem.split(",") - parsed_items = [] - for item in items: - parsed_items.append(item) - result.append(parsed_items) - return result - - -# # # # # main function # # # # # - - -def activity_hourly( - json_file, out_file_name=None, acc_names=[], mess_substring=None, emoji_types=None -): - """ - Counts activity per hour from json_file and stores in out_file_name - - Input: - json_file - [JSON]: list of JSON objects with message data - out_file_name - str: path and filename where output is stored - acc_names - [str]: account names for which activity should be - counted separately (default = []) - mess_substring - [str]: only messages containing at least one - substring in this list are considered. all messages are - considered if set to None (default = None) - emoji_types - [str]: only emojis in this list are considered. all - emojis are considered if set to None (default = None) - - Output: - warning_count - [int]: list of counts for the different possible - warnings that could be raised by the script: - 1st entry: number of messages sent by an author not listed in - acc_names - 2nd entry: number of times that a duplicate DayActivity object - is encounterd. if this happens, the first object in the list - is used. - 3rd entry: number of times a message author mentions themselves - in the message. these mentions are not counted - 4rd entry: number of times a message author emoji reacts to - their own message. these reactions are not counted - 5th entry: number of times an emoji sender is not in acc_names - 6th entry: number of times a mentioned account is not in - acc_names - 7th entry: number of times an account that is replied to is not - in acc_names - - Notes: - The results are saved as JSON objects based on out_file_name - """ - - # initiate array with zeros for counting error occurences - warning_count = [0] * 7 - - # initiate empty result array for DayActivity objects all_day_activity_obj = [] - - # add remainder category to acc_names - acc_names.append("remainder") - all_day_activity_obj = [] - # for each message - for mess in json_file: - # # # check for specific message content # # # - - # if message contains specified substring (or None are specified) - if (mess_substring is None) or ( - any([ss in mess["message_content"] for ss in mess_substring]) - ): - # # # extract data # # # - - # obtain message date, channel and author and reply author - mess_date = mess["datetime"].strftime("%Y-%m-%d") - mess_hour = int(mess["datetime"].strftime("%H")) - mess_chan = mess["channel"] - mess_auth = mess["author"] - rep_auth = mess["replied_user"] - - reactions = parse_reaction(mess["reactions"]) - - try: - # obtain index of author in acc_names - auth_i = acc_names.index(mess_auth) - except Exception as exp: - # if author is not in acc_names, - # raise warning and add counts to remainder - print( - f"WARNING: author name {mess_auth} not found in acc_names", - f"Exception: {exp}", - ) - warning_count[0] += 1 - auth_i = -1 - - if rep_auth is not None: - try: - # obtain index of reply author in acc_names - rep_i = acc_names.index(rep_auth) - except Exception as exp: - # if author is not in acc_names, raise warning - # and add counts to remainder - print( - f"WARNING: author name {rep_auth} not found in acc_names", - f"Exception: {exp}", - ) - warning_count[6] += 1 - rep_i = -1 - else: - rep_i = None - - # # # obtain object index in object list # # # - - # see if an object exists with corresponding date and channel - (all_day_activity_obj, obj_list_i, warning_count) = get_obj_list_i( - all_day_activity_obj, mess_date, mess_chan, acc_names, warning_count - ) - - # # # count activity per hour # # # - - # count reactions - (n_reac, reacting_accs, warning_count) = count_reactions( - reactions, emoji_types, mess_auth, warning_count - ) - - # if there are any reacting accounts - if len(reacting_accs) > 0: - # for each reacting account - for r_a in reacting_accs: - # add reacting accounts - all_day_activity_obj[obj_list_i].reacted_per_acc[auth_i].append(r_a) - - # add n_reac to hour of message that received the emoji - all_day_activity_obj[obj_list_i].reacted[auth_i, mess_hour] += int(n_reac) - - # count raised warnings - warning_count[4] += count_from_list( - reacting_accs, - acc_names, - all_day_activity_obj[obj_list_i].reacter, - mess_hour, - ) - - # count mentions - (n_men, n_rep_men, mentioned_accs, warning_count) = count_mentions( - mess["user_mentions"], rep_auth, mess_auth, warning_count - ) - - # if there are any mentioned accounts - if len(mentioned_accs) > 0: - # for each mentioned account - for m_a in mentioned_accs: - # add mentioned accounts - all_day_activity_obj[obj_list_i].mentioner_per_acc[auth_i].append( - m_a - ) - - # if message was not sent in thread - if mess["threadId"] is None: - # if message is default message - if mess["mess_type"] == 0: - # add 1 to hour of message - all_day_activity_obj[obj_list_i].lone_messages[ - auth_i, mess_hour - ] += int(1) - - # add n_men to hour for message sender - all_day_activity_obj[obj_list_i].mentioner[ - auth_i, mess_hour - ] += int(n_men) - - # count raised warnings - warning_count[5] += count_from_list( - mentioned_accs, - acc_names, - all_day_activity_obj[obj_list_i].mentioned, - mess_hour, - ) - - # if message is reply - elif mess["mess_type"] == 19: - # store account name that replied - # for author of message that was replied to - all_day_activity_obj[obj_list_i].replied_per_acc[rep_i].append( - mess_auth - ) - - # add 1 to hour of message for replier - all_day_activity_obj[obj_list_i].replier[auth_i, mess_hour] += 1 - - # add 1 to hour of message for replied - all_day_activity_obj[obj_list_i].replied[rep_i, mess_hour] += 1 - - # add n_men to hour for message sender - all_day_activity_obj[obj_list_i].mentioner[ - auth_i, mess_hour - ] += int(n_men) - - # count raised warnings - warning_count[5] += count_from_list( - mentioned_accs, - acc_names, - all_day_activity_obj[obj_list_i].mentioned, - mess_hour, - ) - - # add n_rep_men to hour of message - all_day_activity_obj[obj_list_i].rep_mentioner[ - auth_i, mess_hour - ] += int(n_rep_men) - all_day_activity_obj[obj_list_i].rep_mentioned[ - rep_i, mess_hour - ] += int(n_rep_men) - - # if reply is to unknown account - # and this account got mentioned in the reply - if n_rep_men > 0 and rep_i == -1: - print( - "WARNING: acc name {} not found in acc_names".format( - rep_auth - ) - ) - warning_count[5] += 1 - - # if message was sent in thread - else: - # if message is default message - if mess["mess_type"] == 0: - # add 1 to hour of message - all_day_activity_obj[obj_list_i].thr_messages[ - auth_i, mess_hour - ] += int(1) - # add n_men to hour for message sender - all_day_activity_obj[obj_list_i].mentioner[ - auth_i, mess_hour - ] += int(n_men) - - # count raised warnings - warning_count[5] += count_from_list( - mentioned_accs, - acc_names, - all_day_activity_obj[obj_list_i].mentioned, - mess_hour, - ) - # if message is reply - elif mess["mess_type"] == 19: - # store account name that replied - # for author of message that was replied to - all_day_activity_obj[obj_list_i].replied_per_acc[rep_i].append( - mess_auth - ) - - # add 1 to hour of message for replier - all_day_activity_obj[obj_list_i].replier[auth_i, mess_hour] += 1 - - # add 1 to hour of message for replied - all_day_activity_obj[obj_list_i].replied[rep_i, mess_hour] += int(1) - - # add n_men to hour for message sender - all_day_activity_obj[obj_list_i].mentioner[ - auth_i, mess_hour - ] += int(n_men) - - # count raised warnings - warning_count[5] += count_from_list( - mentioned_accs, - acc_names, - all_day_activity_obj[obj_list_i].mentioned, - mess_hour, - ) - - # add n_rep_men to hour of message - all_day_activity_obj[obj_list_i].rep_mentioner[ - auth_i, mess_hour - ] += int(n_rep_men) - all_day_activity_obj[obj_list_i].rep_mentioned[ - rep_i, mess_hour - ] += int(n_rep_men) - - # if reply is to unknown account - # and this account got mentioned in the reply - if n_rep_men > 0 and rep_i == -1: - print( - "WARNING: acc name {} not found in acc_names".format( - rep_auth - ) - ) - warning_count[5] += 1 - - # # # store results # # # - # json_out_file = store_results_json([i.asdict() for i in \ - # all_day_activity_obj], out_file_name) - return (warning_count, [i.asdict() for i in all_day_activity_obj]) - - -# # # # # classes # # # # # - - -class DayActivity: - # define constructor - def __init__( - self, - date, - channel, - lone_messages, - thr_messages, - replier, - replied, - mentioner, - mentioned, - rep_mentioner, - rep_mentioned, - reacter, - reacted, - reacted_per_acc, - mentioner_per_acc, - replied_per_acc, - acc_names, - ): - self.date = date # date of object - self.channel = channel # channel id of object - # number of lone messages per hour per account - self.lone_messages = lone_messages - # number of thread messages per hour per account - self.thr_messages = thr_messages - self.replier = replier # number of replies sent per hour per account - # number of replies received per hour per account - self.replied = replied - self.mentioner = mentioner # number of mentions sent per hour per account - # number of mentions received per hour per account - self.mentioned = mentioned - # number of reply mentions sent per hour per account - self.rep_mentioner = rep_mentioner - # number of reply mentions received per hour per account - self.rep_mentioned = rep_mentioned - # number of reactions sent per hour per account - self.reacter = reacter - # number of reactions received per hour per account - self.reacted = reacted - # list of account names from which reactions - # are received per account (duplicates = multiple reactions) - self.reacted_per_acc = reacted_per_acc - # list of account names that are mentioned by - # account per account (duplicates = multiple mentions) - self.mentioner_per_acc = mentioner_per_acc - # list of account names from which replies are - # received per account (duplicates = multiple replies) - self.replied_per_acc = replied_per_acc - # account names (corresponds to row index of activity types) - self.acc_names = acc_names - - # # # functions # # # - - # turn object into dictionary - - def asdict(self): - return { - "date": self.date, - "channel": self.channel, - "lone_messages": self.lone_messages.tolist(), - "thr_messages": self.thr_messages.tolist(), - "replier": self.replier.tolist(), - "replied": self.replied.tolist(), - "mentioner": self.mentioner.tolist(), - "mentioned": self.mentioned.tolist(), - "rep_mentioner": self.rep_mentioner.tolist(), - "rep_mentioned": self.rep_mentioned.tolist(), - "reacter": self.reacter.tolist(), - "reacted": self.reacted.tolist(), - "reacted_per_acc": self.reacted_per_acc, - "mentioner_per_acc": self.mentioner_per_acc, - "replied_per_acc": self.replied_per_acc, - "acc_names": self.acc_names, - } - - -# # # # # functions # # # # # - - -def get_obj_list_i( - all_day_activity_obj, mess_date, mess_chan, acc_names, warning_count -): - """ - Assesses index of DayActivity object - - Input: - all_day_activity_obj - [obj]: list of DayActivity objects - mess_date - str: date in which message was sent yyyy-mm-dd - mess_chan - str: name of channel in which message was sent - num_rows - int: number of rows for count arrays in DayActivity - - Output: - all_day_activity_obj - [obj]: updated list of DayActivity objects - obj_list_i - int: index of DayActivity object in - all_day_activity_obj that corresponds to the message - - Notes: - if no corresponding DayActivity object is found in - all_day_activity_obj, a new DayActivity object is appended - """ - - # check if DayActivity object corresponding to mess_date and mess_chan exists - obj_overlap = [ - all( - [ - getattr(obj, "date", "Attribute does not exist")[0] == mess_date, - getattr(obj, "channel", "Attribute does not exist")[0] == mess_chan, - ] - ) - for obj in all_day_activity_obj - ] - - # if there is no object for the channel date combination - if not any(obj_overlap): - # create DayActivity object and add it to the list - all_day_activity_obj.append( - DayActivity( - [mess_date], - [mess_chan], - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - np.zeros((len(acc_names), 24), dtype=np.int16), - [[] for _ in range(len(acc_names))], - [[] for _ in range(len(acc_names))], - [[] for _ in range(len(acc_names))], - acc_names, - ) - ) - - # set list index for message - # TODO: Why it was -1? - obj_list_i = int(-1) - - else: - # set list index for message - obj_list_i = int(obj_overlap.index(True)) - - # see if object only occurs once and raise error if more than once - if sum(obj_overlap) > 1: - msg = "WARNING: duplicate DayActivity " - msg += "object, first entry in list is used" - print(msg) - warning_count[1] += 1 - - return all_day_activity_obj, obj_list_i, warning_count - - -# # # - - -def count_mentions(mess_mentions, replied_user, mess_auth, warning_count): - """ - Counts number of user mentions in a message - - Input: - mess_mentions - [str]: all user account names that are mentioned in - the message - replied_user - str: account name of author who is replied to if - message type is reply - mess_auth - str: message author - - Output: - n_men - int: number of mentions in message - n_rep_men - int: number of times the author of the message that is - replied to is mentioned in the message - reacting_accs - [str]: all account names that were mentioned - - Notes: - authors mentioning themselves are not counted - """ - - # set number of interactions to 0 - n_men = 0 - n_rep_men = 0 - mentioned_accs = [] - - # for each mentioned account - for mentioned in mess_mentions: - if mentioned is not None and len(mentioned) > 0: - # if mentioned account is the same as message author - if mentioned == mess_auth: - # print error and skip - msg = f"WARNING: {mess_auth} mentioned themselves. " - msg += "This is not counted" - print(msg) - warning_count[2] += 1 - - else: - # if mentioned account is not the account that was replied to - if mentioned != replied_user: - # add 1 to number of mentions - n_men += 1 - - # add mentioned account to mentioned_accs - mentioned_accs.append(mentioned) - - else: - # add 1 to number of replied mentions - n_rep_men = 1 - - return n_men, n_rep_men, mentioned_accs, warning_count - - -# # # - - -def count_reactions(mess_reactions, emoji_types, mess_auth, warning_count): - """ - Counts number of reactions to a message - - Input: - mess_reactions - [[str]]: list with a list for each emoji type, - containing the accounts that reacted with this emoji and the - emoji type (last entry of lists within list) - emoji_types - [str] or None: list of emoji types to be considered. - All emojis are considered when None - mess_auth - str: message author - warning_count - [int]: list with counts for warning types - - Output: - n_reac - int: number of emoji reactions to post - reacting_accs - [str]: all account names that sent an emoji (if - account sent >1 emoji, account name will be listed >1) - warning_count - [int]: upated list with counts for warning types - - notes: - emojis reacted by the author of the message are not counted but lead - to a warning instead - """ - # set number of reactions to 0 - n_reac = 0 - - # make empty list for all accounts that sent an emoji - reacting_accs = [] - - # for every emoji type - for emoji_type in mess_reactions: - # if reacting account is in acc_names and - # reacted emoji is part of emoji_types if defined - if emoji_types is None or emoji_type[-1] in emoji_types: - # for each account that reacted with this emoji - for reactor in emoji_type[:-1]: - # if the message author posted the emoji - if reactor == mess_auth: - # print error and skip - msg = f"WARNING: {mess_auth} reacted to themselves." - msg += " This is not counted" - print(msg) - warning_count[3] += 1 - - # if the reactor is not empty - elif len(reactor) > 0: - # add 1 to number of reactions - n_reac += 1 - - # store name of reactor - reacting_accs.append(reactor) - - return n_reac, reacting_accs, warning_count - - -# # # - - -def count_from_list(acc_list, acc_names, to_count, mess_hour): - """ - Adds counts per hour to accounts from list - - Input: - acc_list - [str]: all account names that should be counted (the - account is counted for each time it is in the list, allowing for - duplicates) - acc_names - [str]: account names for which activity should be - counted separately - to_count - [[int]]: activity type to be counted - mess_hour - int: hour at which message with activity was sent - - Output: - warning_count - int: number of times warning was raised - - Notes: - counts are added to DayActivity object under the to_count variable - """ - - # initiate warning count at 0 - warning_count = 0 - - # for each account - for acc in acc_list: - try: - # obtain index of account name in acc_names - acc_i = acc_names.index(acc) - - except Exception as exp: - # if acc is not in acc_names, raise warning and add count to remainder - msg = f"WARNING: acc name {acc} not found in acc_names" - msg += f", Exception: {exp}" - print(msg) - warning_count += 1 - acc_i = -1 - - # add 1 to hour of message for acc - to_count[acc_i, mess_hour] += int(1) - - return warning_count - - -# # # - - -def store_results_json(save_dict, file_name, print_out=False): - """ - Stores dictionary or list of dictionaries as JSON file - - Input: - save_dict - {}, [{}]: dictionary or list of dictionaries to be saved - file_name - str: name (including path) to where data is saved - print_out - bool: whether message should be printed confirming that - the data is saved - - Output: - out_file - JSON: JSON file with content from save_dict - - Notes: - JSON file is also saved in location specified by file_name - """ - - # initiate output file - with open(file_name, "w") as f: - # store results - json.dump(save_dict, f) - - # # save and close output file - # out_file.close() - - if print_out: - print("data saved at: " + file_name) diff --git a/discord_analyzer/analysis/analytics_interactions_script.py b/discord_analyzer/analysis/analytics_interactions_script.py deleted file mode 100644 index a5ca076..0000000 --- a/discord_analyzer/analysis/analytics_interactions_script.py +++ /dev/null @@ -1,271 +0,0 @@ -import itertools -from datetime import datetime -from warnings import warn - -from numpy import zeros - - -def sum_interactions_features(cursor_list, dict_keys): - """ - sum the interactions per hour - Parameters: - ------------ - cursor_list : list - the db cursor returned and converted as list - dict_keys : list - the list of dictionary keys, representing the features in database - - Returns: - ---------- - summed_counts_per_hour : dictionary - the dictionary of each feature having summed - the counts per hour, the dictionary of features is returned - """ - - summed_counts_per_hour = {} - for key in dict_keys: - summed_counts_per_hour[key] = zeros(24) - - for key in dict_keys: - # the array of hours 0:23 - for data in cursor_list: - summed_counts_per_hour[key] += data[key] - - return summed_counts_per_hour - - -def per_account_interactions( - cursor_list, - dict_keys=["replier_accounts", "reacter_accounts", "mentioner_accounts"], -): - """ - get per account interactions as `mentioner_accounts`, - `reacter_accounts`, and `replier_accounts` (summing) - Parameters: - ------------ - cursor_list : list - the db cursor returned and converted as list - dict_keys : list - the list of dictionary keys, representing the features in database - - Returns: - ---------- - summed_per_account_interactions : dictionary - the dictionary of each feature having summed the counts per hour, - the dictionary of features is returned - """ - - data_processed = {} - all_interaction_accounts = {} - - # for each interaction - for k in dict_keys: - temp_dict = {} - # get the data of a key in a map - samples = list(map(lambda data_dict: data_dict[k], cursor_list)) - - # flatten the list - samples_flattened = list(itertools.chain(*samples)) - - for _, sample in enumerate(samples_flattened): - account_name = sample[0]["account"] - interaction_count = sample[0]["count"] - - if account_name not in temp_dict.keys(): - temp_dict[account_name] = interaction_count - else: - temp_dict[account_name] += interaction_count - - if account_name not in all_interaction_accounts.keys(): - all_interaction_accounts[account_name] = interaction_count - else: - all_interaction_accounts[account_name] += interaction_count - - data_processed[k] = refine_dictionary(temp_dict) - - data_processed["all_interaction_accounts"] = refine_dictionary( - all_interaction_accounts - ) - - summed_per_account_interactions = data_processed - - return summed_per_account_interactions - - -def refine_dictionary(interaction_dict): - """ - refine dictionary and add the account id to the dictionary - - Parameters: - ------------ - interaction_dict : dict - a dictionary like {'user1': 5, 'user2: 4} - keys are usernames and values are the count of each user interaction - - Returns: - ---------- - refined_dict : nested dictionary - the input refined like this - { - '0': { 'user1': 5 }, - '1': { 'user2': 4 } - } - """ - - refined_dict = {} - for idx, data_acc in enumerate(interaction_dict.keys()): - refined_dict[f"{idx}"] = { - "account": data_acc, - "count": interaction_dict[data_acc], - } - - return refined_dict - - -def filter_channel_name_id( - cursor_list, channel_name_key="channelName", channel_id_key="channelId" -): - """ - filter the cursor list retrieved from DB for channels and their ids - - Parameters: - ------------- - cursor_list : list of dictionaries - the retreived values of DB - channel_name_key : string - the name of channel_name field in DB - default is `channel` - channel_id_key : string - the name of channel_id field in DB - default is `channelId` - Returns: - ---------- - channels_id_dict : dictionary - a dictionary with keys as channel_id and values as channel_name - """ - channels_id_dict = {} - for ch_id_dict in cursor_list: - # the keys in dict are channel id - chId = ch_id_dict[channel_id_key] - # and the values of dict are the channel name - channels_id_dict[chId] = ch_id_dict[channel_name_key] - - return channels_id_dict - - -def filter_channel_thread( - cursor_list, - # channels_id, - # channels_id_name, - thread_name_key="threadName", - author_key="author", - message_content_key="content", - date_key="createdDate", -): - """ - create a dictionary of channels and threads for messages, - sorted by time ascending - - Note: The cursor_list `MUST` be sorted ascending. - - Parameters: - ------------ - cursor_list : list of dictionaries - the list of values in DB containing a thread and messages of authors - # channels_id : list - # a list of channels id - # minimum length of the list is 1 - # channels_id_name : dict - # the dictionary containing {`channelId`: `channel_name`} - thread_name_key : string - the name of the thread field in DB - author_key : string - the name of the author field in DB - message_content_key : string - the name of the message content field in DB - date_key : str - the key to check whether the data is descending or not - - Returns: - ---------- - channel_thread_dict : {str:{str:{str:str}}} - a dictionary having keys of channel names, - and per thread messages as dictionary - # An example of output can be like this: - { - “CHANNEL_NAME1” : - { - “THREAD_NAME1” : - { - “1:@user1”: “Example message 1”, - “2:@user2”: “Example message 2”, - … - }, - “THREAD_NAME2” : - {More example messages in same format}, …}, - “CHANNEL_NAME2” : - {More thread dictionaries with example messages in same format}, …}, - More channel dictionaries with thread dictionaries - with example messages in same format, - … - } - """ - # check the input is descending - date_check = datetime(1961, 1, 1) - for data in cursor_list: - msg_date = datetime.strptime(data[date_key], "%Y-%m-%d %H:%M:%S") - if msg_date >= date_check: - date_check = msg_date - continue - else: - warn("Messages is not ascending ordered!") - - # First we're filtering the records via their channel name - channels_dict = {} - # create an empty array of each channel - # for chId in channels_id: - for record in cursor_list: - ch = record["channelName"] - if ch not in channels_dict: - channels_dict[ch] = [record] - else: - channels_dict[ch].append(record) - - # filtering through the channel name field in dictionary - # for record in cursor_list: - # # chId = record["channelId"] - # # ch = channels_id_name[chId] - # channels_dict[ch].append(record) - - # and the adding the filtering of thread id - channel_thread_dict = {} - - # filtering threads - for ch in channels_dict.keys(): - channel_thread_dict[ch] = {} - # initialize the index - idx = 1 - for record in channels_dict[ch]: - # get the thread name - thread = record[thread_name_key] - - # if the thread wasn't available in dict - # then make a dictionary for that - if thread not in channel_thread_dict[ch].keys(): - # reset the idx for each thread - idx = 1 - # creating the first message - channel_thread_dict[ch][thread] = { - f"{idx}:{record[author_key]}": record[message_content_key] - } - - # if the thread was created before - # then add the author content data to the dictionary - else: - # increase the index for the next messages in thread - idx += 1 - channel_thread_dict[ch][thread][f"{idx}:{record[author_key]}"] = record[ - message_content_key - ] - - return channel_thread_dict diff --git a/discord_analyzer/analyzer/analyzer_heatmaps.py b/discord_analyzer/analyzer/analyzer_heatmaps.py deleted file mode 100644 index 76de1c8..0000000 --- a/discord_analyzer/analyzer/analyzer_heatmaps.py +++ /dev/null @@ -1,187 +0,0 @@ -import logging -from collections import Counter -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.heatmaps_utils import ( - get_bot_id, - get_userids, - getNumberOfActions, - store_counts_dict, -) -from discord_analyzer.DB_operations.mongo_neo4j_ops import MongoNeo4jDB -from discord_analyzer.models.GuildsRnDaoModel import GuildsRnDaoModel -from discord_analyzer.models.HeatMapModel import HeatMapModel -from discord_analyzer.models.RawInfoModel import RawInfoModel - - -class Heatmaps: - def __init__(self, DB_connections: MongoNeo4jDB, testing: bool) -> None: - self.DB_connections = DB_connections - self.testing = testing - - def analysis_heatmap(self, guildId: str, from_start: bool = False): - """ - Based on the rawdata creates and stores the heatmap data - - Parameters: - ------------- - guildId : str - the guild id to analyze data for - from_start : bool - do the analytics from scrach or not - if True, if wouldn't pay attention to the existing data in heatmaps - and will do the analysis from the first date - - - Returns: - --------- - heatmaps_results : list of dictionary - the list of data analyzed - also the return could be None if no database for guild - or no raw info data was available - """ - # activity_hourly() - guild_msg = f"GUILDID: {guildId}:" - - client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client - - if guildId not in client.list_database_names(): - logging.error(f"{guild_msg} Database {guildId} doesn't exist") - logging.error( - f"{guild_msg} Existing databases: {client.list_database_names()}" - ) # flake8: noqa - logging.info(f"{guild_msg} Continuing") - return None - - # Collections involved in analysis - # guild parameter is the name of the database - rawinfo_c = RawInfoModel(client[guildId]) - heatmap_c = HeatMapModel(client[guildId]) - guild_rndao_c = GuildsRnDaoModel(client["Core"]) - - # Testing if there are entries in the rawinfo collection - if rawinfo_c.count() == 0: - msg = f"{guild_msg} No entries in the collection" - msg += "'rawinfos' in {guildId} databse" - logging.warning(msg) - return None - - if not heatmap_c.collection_exists(): - raise Exception( - f"{guild_msg} Collection '{heatmap_c.collection_name}' does not exist" - ) - if not rawinfo_c.collection_exists(): - raise Exception( - f"{guild_msg} Collection '{rawinfo_c.collection_name}' does not exist" - ) - - last_date = heatmap_c.get_last_date() - - if last_date is None or from_start: - # If no heatmap was created, than tha last date is the first - # rawdata entry - # last_date = rawinfo_c.get_first_date() - last_date = guild_rndao_c.get_guild_period(guildId) - if last_date is None: - msg = f"{guild_msg} Collection" - msg += f"'{rawinfo_c.collection_name}' does not exist" - raise Exception(msg) - # last_date.replace(tzinfo=timezone.utc) - else: - last_date = last_date + timedelta(days=1) - - # initialize the data array - heatmaps_results = [] - - # getting the id of bots - bot_ids = get_bot_id( - db_mongo_client=self.DB_connections.mongoOps.mongo_db_access.db_mongo_client, - guildId=guildId, - ) - - while last_date.date() < datetime.now().date(): - entries = rawinfo_c.get_day_entries(last_date, "ANALYZER HEATMAPS: ") - if len(entries) == 0: - # analyze next day - last_date = last_date + timedelta(days=1) - continue - - prepared_list = [] - account_list = get_userids( - db_mongo_client=self.DB_connections.mongoOps.mongo_db_access.db_mongo_client, - guildId=guildId, - ) - - for entry in entries: - if "replied_user" not in entry: - reply = "" - else: - reply = entry["replied_user"] - - # eliminating bots - if entry["author"] not in bot_ids: - prepared_list.append( - { - # .strftime('%Y-%m-%d %H:%M'), - "datetime": entry["createdDate"], - "channel": entry["channelId"], - "author": entry["author"], - "replied_user": reply, - "user_mentions": entry["user_mentions"], - "reactions": entry["reactions"], - "threadId": entry["threadId"], - "mess_type": entry["type"], - } - ) - if entry["author"] not in account_list: - account_list.append(entry["author"]) - - if entry["user_mentions"] is not None: - for account in entry["user_mentions"]: - if account not in account_list and account not in bot_ids: - account_list.append(account) - - activity = activity_hourly(prepared_list, acc_names=account_list) - # # activity[0] - # heatmap = activity[1][0] - # Parsing the activity_hourly into the dictionary - results = self._post_process_data(activity[1], len(account_list)) - heatmaps_results.extend(results) - - # analyze next day - last_date = last_date + timedelta(days=1) - - return heatmaps_results - - def _post_process_data(self, heatmap_data, accounts_len): - results = [] - for heatmap in heatmap_data: - for i in range(accounts_len): - heatmap_dict = {} - heatmap_dict["date"] = heatmap["date"][0] - heatmap_dict["channelId"] = heatmap["channel"][0] - heatmap_dict["thr_messages"] = heatmap["thr_messages"][i] - heatmap_dict["lone_messages"] = heatmap["lone_messages"][i] - heatmap_dict["replier"] = heatmap["replier"][i] - heatmap_dict["replied"] = heatmap["replied"][i] - heatmap_dict["mentioner"] = heatmap["mentioner"][i] - heatmap_dict["mentioned"] = heatmap["mentioned"][i] - heatmap_dict["reacter"] = heatmap["reacter"][i] - heatmap_dict["reacted"] = heatmap["reacted"][i] - heatmap_dict["reacted_per_acc"] = store_counts_dict( - dict(Counter(heatmap["reacted_per_acc"][i])) - ) - heatmap_dict["mentioner_per_acc"] = store_counts_dict( - dict(Counter(heatmap["mentioner_per_acc"][i])) - ) - heatmap_dict["replied_per_acc"] = store_counts_dict( - dict(Counter(heatmap["replied_per_acc"][i])) - ) - heatmap_dict["account_name"] = heatmap["acc_names"][i] - sum_ac = getNumberOfActions(heatmap_dict) - - if not self.testing and sum_ac > 0: - results.append(heatmap_dict) - - return results diff --git a/discord_analyzer/analyzer/analyzer_memberactivities.py b/discord_analyzer/analyzer/analyzer_memberactivities.py deleted file mode 100644 index 7480ca5..0000000 --- a/discord_analyzer/analyzer/analyzer_memberactivities.py +++ /dev/null @@ -1,146 +0,0 @@ -import logging -from datetime import datetime, timedelta - -from discord_analyzer.analysis.compute_member_activity import compute_member_activity -from discord_analyzer.analyzer.memberactivity_utils import MemberActivityUtils -from discord_analyzer.DB_operations.mongo_neo4j_ops import MongoNeo4jDB -from discord_analyzer.models.MemberActivityModel import MemberActivityModel -from discord_analyzer.models.RawInfoModel import RawInfoModel - - -class MemberActivities: - def __init__(self, DB_connections: MongoNeo4jDB) -> None: - self.DB_connections = DB_connections - - self.utils = MemberActivityUtils(DB_connections) - - def analysis_member_activity(self, guildId, mongo_connection_str, from_start=False): - """ - Based on the rawdata creates and stores the member activity data - - Parameters: - ------------- - guildId : str - the guild id to analyze data for - from_start : bool - do the analytics from scrach or not - if True, if wouldn't pay attention to the existing data in memberactivities - and will do the analysis from the first date - - Returns: - --------- - memberactivity_results : list of dictionary - the list of data analyzed - also the return could be None if no database for guild - or no raw info data was available - memberactivity_networkx_results : list of networkx objects - the list of data analyzed in networkx format - also the return could be None if no database for guild - or no raw info data was available - """ - guild_msg = f"GUILDID: {guildId}:" - - client = self.DB_connections.mongoOps.mongo_db_access.db_mongo_client - - # check current guild is exist - if guildId not in client.list_database_names(): - logging.error(f"{guild_msg} Database {guildId} doesn't exist") - logging.error(f"{guild_msg} No such databse!") - logging.info(f"{guild_msg} Continuing") - return (None, None) - - member_activity_c = MemberActivityModel(client[guildId]) - rawinfo_c = RawInfoModel(client[guildId]) - - # Testing if there are entries in the rawinfo collection - if rawinfo_c.count() == 0: - logging.warning( - f"No entries in the collection 'rawinfos' in {guildId} databse" - ) - return (None, None) - - # get current guild_info - guild_info = self.utils.get_one_guild(guildId) - - channels, window, action = ( - guild_info["metadata"]["selectedChannels"], - guild_info["metadata"]["window"], - guild_info["metadata"]["action"], - ) - period = guild_info["metadata"]["period"] - - # get date range to be analyzed - today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - - logging.info(f"{guild_msg} memberactivities Analysis started!") - - # initialize - load_past_data = False - - # if we had data from past to use - if member_activity_c.count() != 0: - load_past_data = True - - load_past_data = load_past_data and not from_start - - # first_date = rawinfo_c.get_first_date().replace( - # hour=0, minute=0, second=0, microsecond=0 - # ) - - first_date = period - if first_date is None: - logging.error(f"No guild: {guildId} available in platforms.core!") - return None, None - - last_date = today - timedelta(days=1) - - date_range = [first_date, last_date] - - if load_past_data: - # num_days_to_load = ( - # max([CON_T_THR, VITAL_T_THR, STILL_T_THR, PAUSED_T_THR])+1 - # ) * WINDOW_D - num_days_to_load = ( - max( - [ - action["CON_T_THR"], - action["VITAL_T_THR"], - action["STILL_T_THR"], - action["PAUSED_T_THR"], - ] - ) - + 1 - ) * window["period_size"] - date_range[0] = date_range[1] - timedelta(days=num_days_to_load) - - # if the date range goes back more than the "7 days `period` forward" - if date_range[0] < period + timedelta(days=window["period_size"]): - date_range[0] = period + timedelta(days=window["period_size"]) - - # get all users during date_range - all_users = self.utils.get_all_users(guildId) - # change format like 23/03/27 - date_range = [dt.strftime("%y/%m/%d") for dt in date_range] - - networkx_objects, activities = compute_member_activity( - guildId, - mongo_connection_str, - channels, - all_users, - date_range, - window, - action, - load_past_data=load_past_data, - ) - - if not from_start: - # first date of storing the data - first_storing_date = member_activity_c.get_last_date() - activities = self.utils.refine_memberactivities_data( - activities, first_storing_date - ) - - memberactivity_results = activities - memberactivity_networkx_results = networkx_objects - - return memberactivity_results, memberactivity_networkx_results diff --git a/discord_analyzer/analyzer/heatmaps_utils.py b/discord_analyzer/analyzer/heatmaps_utils.py deleted file mode 100644 index 7a9c578..0000000 --- a/discord_analyzer/analyzer/heatmaps_utils.py +++ /dev/null @@ -1,108 +0,0 @@ -from discord_analyzer.schemas.accounts import AccountCounts -from pymongo import MongoClient - - -def store_counts_dict(counts_dict): - # make empty result array - obj_array = [] - - # for each account - for acc in counts_dict.keys(): - # make dict and store in array - obj_array.append(AccountCounts(acc, counts_dict[acc]).asdict()) - - return obj_array - - -def getNumberOfActions(heatmap): - """get number of actions""" - sum_ac = 0 - fields = [ - "thr_messages", - "lone_messages", - "replier", - "replied", - "mentioned", - "mentioner", - "reacter", - "reacted", - ] - for field in fields: - for i in range(24): - sum_ac += heatmap[field][i] - return sum_ac - - -def get_bot_id( - db_mongo_client: MongoClient, - guildId: str, - collection_name: str = "guildmembers", - id_field_name: str = "discordId", -) -> list[str]: - """ - get the bot id from guildmembers collection - - Parameters: - ------------ - db_mongo_client : MongoClient - the access to database - guildId : str - the guildId to connect to - collection_name : str - the collection name to use - default is "guildmembers" - id_field_name : str - the fieldId that the account id is saved - default is "discordId" - - Returns: - --------- - bots : list[str] - the list of bot ids - """ - cursor = db_mongo_client[guildId][collection_name].find( - {"isBot": True}, {"_id": 0, id_field_name: 1} - ) - bots = list(cursor) - - bot_ids = [] - if bots != []: - bot_ids = list(map(lambda x: x[id_field_name], bots)) - - return bot_ids - - -def get_userids( - db_mongo_client: MongoClient, - guildId: str, - collection_name: str = "guildmembers", - id_field_name: str = "discordId", -) -> list[str]: - """ - get user ids that are not bot - - Parameters: - ------------ - db_mongo_client : MongoClient - the access to database - guildId : str - the guildId to connect to - collection_name : str - the collection name to use - default is "guildmembers" - id_field_name : str - the fieldId that the account id is saved - default is "discordId" - - Returns: - --------- - user_ids : list[str] - the list of bot ids - """ - cursor = db_mongo_client[guildId][collection_name].find( - {"isBot": False}, {"_id": 0, id_field_name: 1} - ) - users = list(cursor) - user_ids = list(map(lambda user: user[id_field_name], users)) - - return user_ids diff --git a/discord_analyzer/analyzer/utils/analyzer_db_manager.py b/discord_analyzer/analyzer/utils/analyzer_db_manager.py deleted file mode 100644 index 190d3d7..0000000 --- a/discord_analyzer/analyzer/utils/analyzer_db_manager.py +++ /dev/null @@ -1,40 +0,0 @@ -from discord_analyzer.DB_operations.mongo_neo4j_ops import MongoNeo4jDB - - -class AnalyzerDBManager: - def __init__(self): - """ - base class for the analyzer - """ - self.connection_str = None - - def set_mongo_database_info( - self, - mongo_db_user: str, - mongo_db_password: str, - mongo_db_host: str, - mongo_db_port: str, - ): - """ - MongoDB Database information setter - """ - self.mongo_user = mongo_db_user - self.mongo_pass = mongo_db_password - self.mongo_host = mongo_db_host - self.mongo_port = mongo_db_port - - self.connection_str = f"mongodb://{self.mongo_user}:{self.mongo_pass}@{self.mongo_host}:{self.mongo_port}" - - def database_connect(self): - """ - Connect to the database - """ - """ Connection String will be modified once the url is provided""" - - self.DB_connections = MongoNeo4jDB(testing=False) - self.DB_connections.set_mongo_db_ops( - mongo_user=self.mongo_user, - mongo_pass=self.mongo_pass, - mongo_host=self.mongo_host, - mongo_port=self.mongo_port, - ) diff --git a/discord_analyzer/analyzer/utils/guild.py b/discord_analyzer/analyzer/utils/guild.py deleted file mode 100644 index 5220a9d..0000000 --- a/discord_analyzer/analyzer/utils/guild.py +++ /dev/null @@ -1,58 +0,0 @@ -from utils.mongo import MongoSingleton - - -class Guild: - # TODO: Update to `Platform` and add `platform_id` in future - def __init__(self, guild_id: str) -> None: - self.guild_id = guild_id - self.client = MongoSingleton.get_instance().get_client() - - def check_existance(self) -> bool: - """ - check for existance of a Guild - - Returns - ---------- - exists : bool - if the Guild exist or not - """ - platform = self.client["Core"]["platforms"].find_one( - {"metadata.id": self.guild_id}, - {"_id": 1}, - ) - exists: bool - if platform is None: - exists = False - else: - exists = True - - return exists - - def update_isin_progress(self): - """ - update isInProgress field of platforms collection - """ - self.client["Core"]["platforms"].update_one( - {"metadata.id": self.guild_id}, {"$set": {"metadata.isInProgress": False}} - ) - - def get_community_id(self) -> str: - """ - get the community id of a guild - - Returns - -------- - community_id : str - the community that the Guild is related to - """ - platform = self.client["Core"]["platforms"].find_one( - {"metadata.id": self.guild_id}, {"community": 1} - ) - if platform is None: - raise ValueError( - f"No platform is available for the given guild: {self.guild_id}" - ) - - community_id = str(platform["community"]) - - return community_id diff --git a/discord_analyzer/connector/__init__.py b/discord_analyzer/connector/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/discord_analyzer/models/__init__.py b/discord_analyzer/models/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/discord_analyzer/rn_analyzer.py b/discord_analyzer/rn_analyzer.py deleted file mode 100644 index 82a6827..0000000 --- a/discord_analyzer/rn_analyzer.py +++ /dev/null @@ -1,162 +0,0 @@ -import logging - -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps -from discord_analyzer.analyzer.analyzer_memberactivities import MemberActivities -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics -from discord_analyzer.analyzer.utils.analyzer_db_manager import AnalyzerDBManager -from discord_analyzer.analyzer.utils.guild import Guild - - -class RnDaoAnalyzer(AnalyzerDBManager): - """ - RnDaoAnalyzer - class that handles database connections and data analysis - """ - - def __init__(self, guild_id: str, testing=False): - """ - Class initiation function - """ - """ Testing, prevents from data upload""" - logging.basicConfig() - logging.getLogger().setLevel(logging.INFO) - - self.testing = testing - self.neo4j_analytics = Neo4JAnalytics() - self.guild_object = Guild(guild_id) - self.guild_id = guild_id - self.community_id = self.guild_object.get_community_id() - - def run_once(self): - """Run analysis once (Wrapper)""" - # check if the guild was available - # if not, will raise an error - self.check_guild() - - logging.info(f"Creating heatmaps for guild: {self.guild_id}") - - heatmaps_analysis = Heatmaps(self.DB_connections, self.testing) - heatmaps_data = heatmaps_analysis.analysis_heatmap(self.guild_id) - - # storing heatmaps since memberactivities use them - analytics_data = {} - analytics_data["heatmaps"] = heatmaps_data - analytics_data["memberactivities"] = (None, None) - - self.DB_connections.store_analytics_data( - analytics_data=analytics_data, - guild_id=self.guild_id, - community_id=self.community_id, - remove_memberactivities=False, - remove_heatmaps=False, - ) - - memberactivities_analysis = MemberActivities(self.DB_connections) - ( - member_activities_data, - member_acitivities_networkx_data, - ) = memberactivities_analysis.analysis_member_activity( - self.guild_id, self.connection_str - ) - - analytics_data = {} - # storing whole data into a dictinoary - analytics_data["heatmaps"] = None - analytics_data["memberactivities"] = ( - member_activities_data, - member_acitivities_networkx_data, - ) - - self.DB_connections.store_analytics_data( - analytics_data=analytics_data, - guild_id=self.guild_id, - community_id=self.community_id, - remove_heatmaps=False, - remove_memberactivities=False, - ) - - self.neo4j_analytics.compute_metrics(guildId=self.guild_id, from_start=False) - - self.guild_object.update_isin_progress() - - def recompute_analytics(self): - """ - recompute the memberactivities (and heatmaps in case needed) - for a new selection of channels - - - - first it would update the channel selection in Core.Platform - - - Second the memebracitivites collection - of the input guildId would become empty - - - Third we would have the analytics running again on the - new channel selection (analytics would be inserted in memebractivities) - - - Returns: - --------- - `None` - """ - # check if the guild was available - # if not, will raise an error - self.check_guild() - - heatmaps_analysis = Heatmaps(self.DB_connections, self.testing) - - logging.info(f"Analyzing the Heatmaps data for guild: {self.guild_id}!") - heatmaps_data = heatmaps_analysis.analysis_heatmap( - guildId=self.guild_id, from_start=True - ) - - # storing heatmaps since memberactivities use them - analytics_data = {} - analytics_data["heatmaps"] = heatmaps_data - analytics_data["memberactivities"] = (None, None) - - self.DB_connections.store_analytics_data( - analytics_data=analytics_data, - guild_id=self.guild_id, - community_id=self.community_id, - remove_memberactivities=False, - remove_heatmaps=True, - ) - - # run the member_activity analyze - logging.info(f"Analyzing the MemberActivities data for guild: {self.guild_id}!") - memberactivity_analysis = MemberActivities(self.DB_connections) - ( - member_activities_data, - member_acitivities_networkx_data, - ) = memberactivity_analysis.analysis_member_activity( - self.guild_id, self.connection_str, from_start=True - ) - - # storing whole data into a dictinoary - analytics_data = {} - # storing whole data into a dictinoary - analytics_data["heatmaps"] = None - analytics_data["memberactivities"] = ( - member_activities_data, - member_acitivities_networkx_data, - ) - - logging.info(f"Storing analytics data for guild: {self.guild_id}!") - self.DB_connections.store_analytics_data( - analytics_data=analytics_data, - guild_id=self.guild_id, - community_id=self.community_id, - remove_memberactivities=True, - remove_heatmaps=False, - ) - - self.neo4j_analytics.compute_metrics(guildId=self.guild_id, from_start=True) - self.guild_object.update_isin_progress() - - def check_guild(self): - """ - check if the guild is available - """ - exist = self.guild_object.check_existance() - if exist is False: - raise ValueError(f"Guild with guildId: {self.guild_id} doesn't exist!") diff --git a/discord_analyzer/schemas/__init__.py b/discord_analyzer/schemas/__init__.py deleted file mode 100644 index e5a0d9b..0000000 --- a/discord_analyzer/schemas/__init__.py +++ /dev/null @@ -1 +0,0 @@ -#!/usr/bin/env python3 diff --git a/discord_analyzer/schemas/accounts.py b/discord_analyzer/schemas/accounts.py deleted file mode 100644 index 5e2c416..0000000 --- a/discord_analyzer/schemas/accounts.py +++ /dev/null @@ -1,13 +0,0 @@ -class AccountCounts: - """ - Class for storing number of interactions per account - """ - - # define constructor - def __init__(self, account, counts): - self.account = account # account name - self.counts = counts # number of interactions - - # convert as dict - def asdict(self): - return ({"account": self.account, "count": self.counts},) diff --git a/discord_utils.py b/discord_utils.py deleted file mode 100644 index 7c655f8..0000000 --- a/discord_utils.py +++ /dev/null @@ -1,114 +0,0 @@ -import logging - -from analyzer_init import AnalyzerInit -from automation.automation_workflow import AutomationWorkflow -from tc_messageBroker.rabbit_mq.saga.saga_base import get_saga -from utils.credentials import get_mongo_credentials -from utils.get_guild_utils import get_guild_community_ids -from utils.rabbitmq import RabbitMQSingleton -from utils.transactions_ordering import sort_transactions - - -def analyzer_recompute(sagaId: str): - saga = get_saga_instance(sagaId=sagaId) - if saga is None: - logging.warn( - f"Warn: Saga not found!, stopping the recompute for sagaId: {sagaId}" - ) - else: - platform_id = saga.data["platformId"] - guildId = get_guild_community_ids(platform_id) - - logging.info("Initializing the analyzer") - analyzer_init = AnalyzerInit(guildId) - analyzer = analyzer_init.get_analyzer() - logging.info("Analyzer initialized") - - def recompute_wrapper(**kwargs): - logging.info("recompute wrapper") - analyzer.recompute_analytics() - - def publish_wrapper(**kwargs): - pass - - logging.info("Calling the saga.next()") - saga.next( - publish_method=publish_wrapper, - call_function=recompute_wrapper, - ) - - return sagaId - - -def analyzer_run_once(sagaId: str): - saga = get_saga_instance(sagaId=sagaId) - if saga is None: - logging.warn(f"Saga not found!, stopping the run_once for sagaId: {sagaId}") - else: - platform_id = saga.data["platformId"] - guildId = get_guild_community_ids(platform_id) - - analyzer_init = AnalyzerInit(guildId) - analyzer = analyzer_init.get_analyzer() - - def run_once_wrapper(**kwargs): - analyzer.run_once() - - def publish_wrapper(**kwargs): - pass - - saga.next( - publish_method=publish_wrapper, - call_function=run_once_wrapper, - ) - return sagaId - - -def get_saga_instance(sagaId: str): - mongo_creds = get_mongo_credentials() - - saga = get_saga( - sagaId=sagaId, - connection_url=mongo_creds["connection_str"], - db_name="Saga", - collection="sagas", - ) - if saga is None: - raise ValueError(f"Saga with sagaId: {sagaId} not found!") - - return saga - - -def publish_on_success(connection, result, *args, **kwargs): - try: - sagaId = args[0] - logging.info(f"SAGAID: {sagaId}: ON_SUCCESS callback!") - - saga = get_saga_instance(sagaId=sagaId) - rabbitmq = RabbitMQSingleton.get_instance().get_client() - - transactions = saga.choreography.transactions - - (transactions_ordered, tx_not_started_count) = sort_transactions(transactions) - - platform_id = saga.data["platformId"] - guildId = get_guild_community_ids(platform_id) - - msg = f"GUILDID: {guildId}: " - if tx_not_started_count != 0: - tx = transactions_ordered[0] - - logging.info(f"{msg}Publishing for {tx.queue}") - - rabbitmq.connect(tx.queue) - rabbitmq.publish( - queue_name=tx.queue, - event=tx.event, - content={"uuid": sagaId, "data": saga.data}, - ) - - automation_workflow = AutomationWorkflow() - automation_workflow.start(guild_id=guildId) - - except Exception as exp: - logging.info(f"Exception occured in job on_success callback: {exp}") diff --git a/docker-compose.test.yml b/docker-compose.test.yml index 3bd3a37..6c70063 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -42,6 +42,7 @@ services: condition: service_healthy mongo: image: "mongo:6.0.8" + restart: always attach: false environment: - MONGO_INITDB_ROOT_USERNAME=root @@ -54,6 +55,7 @@ services: start_period: 40s neo4j: image: "neo4j:5.9.0" + restart: always attach: false environment: - NEO4J_AUTH=neo4j/password @@ -67,6 +69,7 @@ services: start_period: 40s rabbitmq: image: "rabbitmq:3-management-alpine" + restart: always environment: - RABBITMQ_DEFAULT_USER=root - RABBITMQ_DEFAULT_PASS=pass diff --git a/requirements.txt b/requirements.txt index 4389aba..b778b21 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,11 +2,9 @@ attrs==22.2.0 dnspython==2.2.1 exceptiongroup==1.1.0 iniconfig==2.0.0 -numpy==1.24.1 packaging==23.0 pluggy==1.0.0 pymongo==4.3.3 -pytest==7.2.0 python-dateutil==2.8.2 pytz==2022.7.1 python-dotenv>=1.0.0 @@ -14,8 +12,6 @@ six==1.16.0 tomli==2.0.1 networkx==3.1 requests==2.29.0 -pytest-cov==4.0.0 -coverage==7.2.5 python-dateutil==2.8.2 tqdm tc-messageBroker==1.6.7 @@ -23,6 +19,6 @@ sentry-sdk rq redis tc-core-analyzer-lib==1.3.1 -tc-neo4j-lib==2.0.0 +tc-neo4j-lib==2.0.1 pybars3 backoff==2.2.1 diff --git a/server.py b/server.py deleted file mode 100644 index 830e01f..0000000 --- a/server.py +++ /dev/null @@ -1,74 +0,0 @@ -""" -start the project using rabbitMQ -""" - -import functools -import logging -from typing import Any - -import backoff -from discord_utils import analyzer_recompute, analyzer_run_once, publish_on_success -from pika.exceptions import AMQPConnectionError, ConnectionClosedByBroker -from rq import Queue as RQ_Queue -from tc_messageBroker.rabbit_mq.event import Event -from tc_messageBroker.rabbit_mq.queue import Queue -from utils.rabbitmq import RabbitMQSingleton -from utils.redis import RedisSingleton -from utils.sentryio_service import set_up_sentryio - - -@backoff.on_exception( - wait_gen=backoff.expo, - exception=(ConnectionClosedByBroker, ConnectionError, AMQPConnectionError), - # waiting for 3 hours - max_time=60 * 60 * 3, -) -def analyzer(): - # sentryio service - set_up_sentryio() - rabbit_mq = RabbitMQSingleton.get_instance().get_client() - redis = RedisSingleton.get_instance().get_client() - - # 24 hours equal to 86400 seconds - rq_queue = RQ_Queue(connection=redis, default_timeout=86400) - - analyzer_recompute = functools.partial(recompute_wrapper, redis_queue=rq_queue) - analyzer_run_once = functools.partial(run_once_wrapper, redis_queue=rq_queue) - - rabbit_mq.connect(Queue.DISCORD_ANALYZER, heartbeat=60) - - rabbit_mq.on_event(Event.DISCORD_ANALYZER.RUN, analyzer_recompute) - rabbit_mq.on_event(Event.DISCORD_ANALYZER.RUN_ONCE, analyzer_run_once) - - if rabbit_mq.channel is None: - raise ConnectionError("Couldn't connect to rmq server!") - else: - logging.info("Started Consuming!") - rabbit_mq.channel.start_consuming() - - -def recompute_wrapper(body: dict[str, Any], redis_queue: RQ_Queue): - sagaId = body["content"]["uuid"] - logging.info(f"SAGAID:{sagaId} recompute job Adding to queue") - - redis_queue.enqueue( - analyzer_recompute, - sagaId=sagaId, - on_success=publish_on_success, - ) - - -def run_once_wrapper(body: dict[str, Any], redis_queue: RQ_Queue): - sagaId = body["content"]["uuid"] - logging.info(f"SAGAID:{sagaId} run_once job Adding to queue") - redis_queue.enqueue( - analyzer_run_once, - sagaId=sagaId, - on_success=publish_on_success, - ) - - -if __name__ == "__main__": - logging.basicConfig() - logging.getLogger().setLevel(logging.INFO) - analyzer() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f773acf --- /dev/null +++ b/setup.py @@ -0,0 +1,17 @@ +from setuptools import find_packages, setup + +with open("requirements.txt") as f: + requirements = f.read().splitlines() + + +setup( + name="tc-analyzer-lib", + version="1.0.0", + author="Mohammad Amin Dadgar, TogetherCrew", + maintainer="Mohammad Amin Dadgar", + maintainer_email="dadgaramin96@gmail.com", + packages=find_packages(), + description="A platform agnostic analyzer, computing the TogetherCrew dashboard metrics.", + long_description=open("README.md").read(), + install_requires=requirements, +) diff --git a/automation/__init__.py b/tc_analyzer_lib/DB_operations/__init__.py similarity index 100% rename from automation/__init__.py rename to tc_analyzer_lib/DB_operations/__init__.py diff --git a/discord_analyzer/DB_operations/mongo_neo4j_ops.py b/tc_analyzer_lib/DB_operations/mongo_neo4j_ops.py similarity index 78% rename from discord_analyzer/DB_operations/mongo_neo4j_ops.py rename to tc_analyzer_lib/DB_operations/mongo_neo4j_ops.py index 35f53ab..02a0598 100644 --- a/discord_analyzer/DB_operations/mongo_neo4j_ops.py +++ b/tc_analyzer_lib/DB_operations/mongo_neo4j_ops.py @@ -1,7 +1,8 @@ import logging -from discord_analyzer.DB_operations.mongodb_interaction import MongoDBOps -from discord_analyzer.DB_operations.network_graph import make_neo4j_networkx_query_dict +from tc_analyzer_lib.DB_operations.mongodb_interaction import MongoDBOps +from tc_analyzer_lib.DB_operations.network_graph import NetworkGraph +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps, Query @@ -15,16 +16,11 @@ def __init__(self, testing=False): self.mongoOps = None self.testing = testing - def set_mongo_db_ops( - self, mongo_user: str, mongo_pass: str, mongo_host: str, mongo_port: str - ): + def set_mongo_db_ops(self): """ setup the MongoDBOps class with the parameters needed - """ - self.mongoOps = MongoDBOps( - user=mongo_user, password=mongo_pass, host=mongo_host, port=mongo_port - ) + self.mongoOps = MongoDBOps() self.mongoOps.set_mongo_db_access() try: info = self.mongoOps.mongo_db_access.db_mongo_client.server_info() @@ -35,8 +31,8 @@ def set_mongo_db_ops( def store_analytics_data( self, analytics_data: dict, - guild_id: str, - community_id: str, + platform_id: str, + graph_schema: GraphSchema, remove_memberactivities: bool = False, remove_heatmaps: bool = False, ): @@ -51,10 +47,10 @@ def store_analytics_data( values of the heatmaps is a list of dictinoaries and memberactivities is a tuple of memberactivities dictionary list and memebractivities networkx object dictionary list - guild_id: str + platform_id: str what the data is related to - community_id : str - the community id to save the data for + graph_schema : GraphSchema + the schema for graph to be saved remove_memberactivities : bool remove the whole memberactivity data and insert default is `False` which means don't delete the existing data @@ -74,7 +70,7 @@ def store_analytics_data( if not self.testing: # mongodb transactions self.mongoOps._do_analytics_write_transaction( - guildId=guild_id, + platform_id=platform_id, delete_heatmaps=remove_heatmaps, delete_member_acitivities=remove_memberactivities, acitivties_list=memberactivities_data, @@ -86,14 +82,12 @@ def store_analytics_data( memberactivities_networkx_data is not None and memberactivities_networkx_data != [] ): - queries_list = make_neo4j_networkx_query_dict( + network_graph = NetworkGraph(graph_schema, platform_id) + queries_list = network_graph.make_neo4j_networkx_query_dict( networkx_graphs=memberactivities_networkx_data, - guildId=guild_id, - community_id=community_id, ) - print(queries_list[0]) self.run_operations_transaction( - guildId=guild_id, + platform_id=platform_id, queries_list=queries_list, remove_memberactivities=remove_memberactivities, ) @@ -101,14 +95,14 @@ def store_analytics_data( logging.warning("Testing mode enabled! Not saving any data") def run_operations_transaction( - self, guildId: str, queries_list: list[Query], remove_memberactivities: bool + self, platform_id: str, queries_list: list[Query], remove_memberactivities: bool ) -> None: """ do the deletion and insertion operations inside a transaction Parameters: ------------ - guildId : str + platform_id : str the guild id that the users are connected to it which we're going to delete the relations of it queries_list : list @@ -117,15 +111,15 @@ def run_operations_transaction( remove_memberactivities : bool if True, remove the old data specified in that guild """ - self.guild_msg = f"GUILDID: {guildId}:" + self.guild_msg = f"platform_id: {platform_id}:" transaction_queries: list[Query] = [] if remove_memberactivities: logging.info( - f"{self.guild_msg} Neo4J GuildId accounts relation will be removed!" + f"{self.guild_msg} Neo4J platform_id accounts relation will be removed!" ) delete_relationship_query = self._create_guild_rel_deletion_query( - guildId=guildId + platform_id=platform_id ) transaction_queries.append(delete_relationship_query) @@ -134,7 +128,7 @@ def run_operations_transaction( self.neo4j_ops.run_queries_in_batch(transaction_queries, message=self.guild_msg) def _create_guild_rel_deletion_query( - self, guildId: str, relation_name: str = "INTERACTED_WITH" + self, platform_id: str, relation_name: str = "INTERACTED_WITH" ) -> Query: """ create a query to delete the relationships @@ -142,7 +136,7 @@ def _create_guild_rel_deletion_query( Parameters: ------------- - guildId : str + platform_id : str the guild id that the users are connected to it relation_name : str the relation we want to delete @@ -155,12 +149,12 @@ def _create_guild_rel_deletion_query( query_str = f""" MATCH (:DiscordAccount) - -[r:{relation_name} {{guildId: '{guildId}'}}]-(:DiscordAccount) + -[r:{relation_name} {{platformId: '{platform_id}'}}]-(:DiscordAccount) DETACH DELETE r""" parameters = { "relation_name": relation_name, - "guild_id": guildId, + "platform_id": platform_id, } query = Query( diff --git a/discord_analyzer/DB_operations/mongodb_access.py b/tc_analyzer_lib/DB_operations/mongodb_access.py similarity index 87% rename from discord_analyzer/DB_operations/mongodb_access.py rename to tc_analyzer_lib/DB_operations/mongodb_access.py index 402006b..4d1b440 100644 --- a/discord_analyzer/DB_operations/mongodb_access.py +++ b/tc_analyzer_lib/DB_operations/mongodb_access.py @@ -1,8 +1,8 @@ -from pymongo import MongoClient +from tc_analyzer_lib.utils.mongo import MongoSingleton class DB_access: - def __init__(self, db_name, connection_string) -> None: + def __init__(self, db_name) -> None: """ set-up the MongoDB database access @@ -22,7 +22,7 @@ def __init__(self, db_name, connection_string) -> None: the connection string used to connect to MongoDB """ - client = self._get_mongoClient(connection_string) + client = MongoSingleton.get_instance().get_client() self.db_name = db_name # if db_name is None: # self.db_client = None @@ -31,25 +31,6 @@ def __init__(self, db_name, connection_string) -> None: self.db_mongo_client = client - def _get_mongoClient(self, connection_string): - """ - get the database instance - - Parameters: - ------------ - connection_string : string - the url of connection - Returns: - --------- - client : MongoClient - the mongodb client access - """ - client = MongoClient( - connection_string, serverSelectionTimeoutMS=10000, connectTimeoutMS=200000 - ) - - return client - def _db_call(self, calling_function, query, feature_projection=None, sorting=None): """ call the function on database, it could be whether aggragation or find diff --git a/discord_analyzer/DB_operations/mongodb_interaction.py b/tc_analyzer_lib/DB_operations/mongodb_interaction.py similarity index 78% rename from discord_analyzer/DB_operations/mongodb_interaction.py rename to tc_analyzer_lib/DB_operations/mongodb_interaction.py index c288040..9ef5cb4 100644 --- a/discord_analyzer/DB_operations/mongodb_interaction.py +++ b/tc_analyzer_lib/DB_operations/mongodb_interaction.py @@ -1,39 +1,33 @@ import logging -from discord_analyzer.DB_operations.mongodb_access import DB_access from pymongo.read_concern import ReadConcern from pymongo.write_concern import WriteConcern +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access class MongoDBOps: - def __init__(self, user, password, host, port): + def __init__(self): """ mongoDB database operations """ - self.connection_str = f"mongodb://{user}:{password}@{host}:{port}" self.DB_access = DB_access - self.guild_msg = "" - # logging.basicConfig() - # logging.getLogger().setLevel(logging.INFO) - def set_mongo_db_access(self, guildId=None): + def set_mongo_db_access(self, platform_id=None): """ set a database access to a specific guild - if guildId was `None` then the mongo_db_access just + if platform_id was `None` then the mongo_db_access just have the `db_mongo_client` to use but if wasn't then mongo_db_access would also have db_client which is connected to a guild """ - self.mongo_db_access = self.DB_access( - db_name=guildId, connection_string=self.connection_str - ) - self.guild_msg = f"GUILDID: {guildId}:" + self.mongo_db_access = self.DB_access(db_name=platform_id) + self.guild_msg = f"PLATFORMID: {platform_id}:" def _do_analytics_write_transaction( self, - guildId, + platform_id, delete_heatmaps, delete_member_acitivities, acitivties_list, @@ -59,7 +53,7 @@ def _do_analytics_write_transaction( def callback_wrapper(session): self._session_custom_transaction( session, - guildId, + platform_id, delete_heatmaps, delete_member_acitivities, acitivties_list, @@ -77,7 +71,7 @@ def callback_wrapper(session): def _session_custom_transaction( self, session, - guildId, + platform_id, delete_heatmaps, delete_member_acitivities, memberactiivties_list, @@ -90,22 +84,24 @@ def _session_custom_transaction( also insertion of activities_list and heatmaps_list after """ - self.guild_msg = f"GUILDID: {guildId}:" + self.guild_msg = f"PLATFORMID: {platform_id}:" if delete_heatmaps: logging.info(f"{self.guild_msg} Removing Heatmaps data!") - self.empty_collection(session=session, guildId=guildId, activity="heatmaps") + self.empty_collection( + session=session, platform_id=platform_id, activity="heatmaps" + ) if delete_member_acitivities: logging.info(f"{self.guild_msg} Removing MemberActivities MongoDB data!") self.empty_collection( - session=session, guildId=guildId, activity="memberactivities" + session=session, platform_id=platform_id, activity="memberactivities" ) if memberactiivties_list is not None and memberactiivties_list != []: self.insert_into_memberactivities_batches( session=session, acitivities_list=memberactiivties_list, - guildId=guildId, + platform_id=platform_id, batch_size=batch_size, ) @@ -113,12 +109,12 @@ def _session_custom_transaction( self.insert_into_heatmaps_batches( session=session, heatmaps_list=heatmaps_list, - guildId=guildId, + platform_id=platform_id, batch_size=batch_size, ) def insert_into_memberactivities_batches( - self, session, acitivities_list, guildId, batch_size=1000 + self, session, acitivities_list, platform_id, batch_size=1000 ): """ insert data into memberactivities collection of mongoDB in batches @@ -130,10 +126,10 @@ def insert_into_memberactivities_batches( batch_size : int the count of data in batches default is 1000 - guildId : str - the guildId to insert data to it + platform_id : str + the platform_id to insert data to it """ - memberactivities_collection = session.client[guildId].memberactivities + memberactivities_collection = session.client[platform_id].memberactivities self._batch_insertion( collection=memberactivities_collection, data=acitivities_list, @@ -142,7 +138,7 @@ def insert_into_memberactivities_batches( ) def insert_into_heatmaps_batches( - self, session, heatmaps_list, guildId, batch_size=1000 + self, session, heatmaps_list, platform_id, batch_size=1000 ): """ insert data into heatmaps collection of mongoDB in batches @@ -154,10 +150,10 @@ def insert_into_heatmaps_batches( batch_size : int the count of data in batches default is 1000 - guildId : str - the guildId to insert data to it + platform_id : str + the platform_id to insert data to it """ - heatmaps_collection = session.client[guildId].heatmaps + heatmaps_collection = session.client[platform_id].heatmaps self._batch_insertion( heatmaps_collection, @@ -188,7 +184,7 @@ def _batch_insertion(self, collection, data, message, batch_size): logging.info(f"{message}: Batch {loop_idx + 1}/{batch_count}") collection.insert_many(data[batch_idx : batch_idx + batch_size]) - def empty_collection(self, session, guildId, activity): + def empty_collection(self, session, platform_id, activity): """ empty a specified collection @@ -196,8 +192,8 @@ def empty_collection(self, session, guildId, activity): ------------- session : mongoDB session the session to needed to delete the data - guildId : str - the guildId to remove its collection data + platform_id : str + the platform_id to remove its collection data activity : str `memberactivities` or `heatmaps` or other collections the collection to access and delete its data @@ -207,9 +203,9 @@ def empty_collection(self, session, guildId, activity): `None` """ if activity == "heatmaps": - collection = session.client[guildId].heatmaps + collection = session.client[platform_id].heatmaps elif activity == "memberactivities": - collection = session.client[guildId].memberactivities + collection = session.client[platform_id].memberactivities else: raise NotImplementedError( "removing heatmaps or memberactivities are just implemented!" diff --git a/tc_analyzer_lib/DB_operations/network_graph.py b/tc_analyzer_lib/DB_operations/network_graph.py new file mode 100644 index 0000000..43a6888 --- /dev/null +++ b/tc_analyzer_lib/DB_operations/network_graph.py @@ -0,0 +1,240 @@ +# Store and Rietrive the network graph from neo4j db + +import datetime + +import networkx +from tc_analyzer_lib.schemas import GraphSchema +from tc_neo4j_lib import Query + + +class NetworkGraph: + def __init__( + self, + graph_schema: GraphSchema, + platform_id: str, + ) -> None: + self.graph_schema = graph_schema + self.platform_id = platform_id + + def make_neo4j_networkx_query_dict( + self, + networkx_graphs: dict[datetime.datetime, networkx.classes.graph.Graph], + ) -> list[Query]: + """ + make a list of queries to store networkx graphs into the neo4j + + Parameters: + ------------- + networkx_graphs : dictionary of networkx.classes.graph.Graph + or networkx.classes.digraph.DiGraph + the dictinoary keys is the date of graph and the values + are the actual networkx graphs + + Returns: + ----------- + queries_list : list[Query] + list of string queries to store data into neo4j + """ + # extract the graphs and their corresponding interaction dates + graph_list, graph_dates = list(networkx_graphs.values()), list( + networkx_graphs.keys() + ) + + # make a list of queries for each date to save + queries_list = self.make_graph_list_query( + networkx_graphs=graph_list, + networkx_dates=graph_dates, + ) + + return queries_list + + def make_graph_list_query( + self, + networkx_graphs: networkx.classes.graph.Graph, + networkx_dates: list[datetime.datetime], + ) -> list[Query]: + """ + Make a list of queries for each graph to save their results + + Parameters: + ------------- + networkx_graphs : list of networkx.classes.graph.Graph + or networkx.classes.digraph.DiGraph + the list of graph created from user interactions + networkx_dates : list of dates + the dates for each graph + + + Returns: + --------- + final_queries : list[Query] + list of strings, each is a query for an interaction graph to be created + """ + final_queries: list[Query] = [] + + for graph, date in zip(networkx_graphs, networkx_dates): + nodes_dict = graph.nodes.data() + edges_dict = graph.edges.data() + + node_queries, query_relations = self.create_network_query( + nodes_dict, + edges_dict, + date, + ) + + final_queries.extend(node_queries) + final_queries.extend(query_relations) + + return final_queries + + def create_network_query( + self, + nodes_dict: networkx.classes.reportviews.NodeDataView, + edge_dict: networkx.classes.reportviews.EdgeDataView, + graph_date: datetime.datetime, + ) -> tuple[list[Query], list[Query]]: + """ + make string query to save the accounts with their + account_name and relationships with their relation from **a graph**. + The query to add the nodes and edges is using `MERGE` operator + of Neo4j db since it won't create duplicate nodes and edges + if the relation and the account was saved before + + Parameters: + ------------- + nodes_dict : NodeDataView + the nodes of a Networkx graph + edge_dict : EdgeDataView + the edges of a Networkx graph + graph_date : datetime + the date of the interaction in as a python datetime object + + Returns: + ---------- + node_queries : list[Query] + the list of MERGE queries for creating all nodes + rel_queries : list[Query] + the list of MERGE queries for creating all relationships + """ + # getting the timestamp `date` + graph_date_timestamp = self.get_timestamp(graph_date) + date_now_timestamp = self.get_timestamp() + + # labels to be saved in Neo4j + # i.e.: DiscordMember + user_label = self.graph_schema.user_label + # i.e.: DiscordPlatform + platform_label = self.graph_schema.platform_label + member_rel_label = self.graph_schema.member_relation + users_rel_label = self.graph_schema.interacted_with_rel + + # initializiation of queries + rel_queries: list[Query] = [] + node_queries: list[Query] = [] + + for node in nodes_dict: + node_str_query = "" + # retrieving node data + # user number + node_num = node[0] + # user account name + node_acc_name = node[1]["acc_name"] + # creating the query + node_str_query += ( + f"MERGE (a{node_num}:{user_label} {{id: $node_acc_name}}) " + ) + node_str_query += f"""ON CREATE SET a{node_num}.createdAt = + $date_now_timestamp + """ + + # creating the platform if they weren't created before + node_str_query += f"""MERGE (g:{platform_label} {{id: $platform_id}}) + ON CREATE SET g.createdAt = $date_now_timestamp + """ + + node_str_query += f""" + MERGE (a{node_num}) + -[rel_platform{node_num}:{member_rel_label}]-> (g) + ON CREATE SET + rel_platform{node_num}.createdAt = $date_now_timestamp + """ + + parameters = { + "node_acc_name": node_acc_name, + "date_now_timestamp": int(date_now_timestamp), + "platform_id": self.platform_id, + } + query_str = node_str_query + ";" + + node_queries.append(Query(query_str, parameters)) + + for idx, edge in enumerate(edge_dict): + rel_str_query = "" + + # retrieving edge data + + # relationship from user number + starting_acc_num = edge[0] + # relationship to user number + ending_acc_num = edge[1] + + starting_node_acc_name = nodes_dict[starting_acc_num]["acc_name"] + ending_node_acc_name = nodes_dict[ending_acc_num]["acc_name"] + + # the interaction count between them + interaction_count = edge[2]["weight"] + + rel_str_query += f"""MATCH (a{starting_acc_num}:{user_label} + {{id: $starting_node_acc_name}}) + MATCH (a{ending_acc_num}:{user_label} + {{id: $ending_node_acc_name}}) + MERGE + (a{starting_acc_num}) -[rel{idx}:{users_rel_label} + {{ + date: $date, + weight: $weight, + platformId: $platform_id + }} + ]-> (a{ending_acc_num}) + """ + query_str = rel_str_query + ";" + parameters = { + "starting_node_acc_name": starting_node_acc_name, + "ending_node_acc_name": ending_node_acc_name, + "date": int(graph_date_timestamp), + "weight": int(interaction_count), + "platform_id": self.platform_id, + } + rel_queries.append(Query(query_str, parameters)) + + return node_queries, rel_queries + + def get_timestamp(self, time: datetime.datetime | None = None) -> float: + """ + get the timestamp of the given time or just now + + Parameters + ------------ + time : datetime.datetime + the time to get its timestamp + default is `None` meaning to send the time of now + + Returns + -------- + timestamp : float + the timestamp of the time multiplied to 1000 + """ + using_time: datetime.datetime + if time is not None: + using_time = time + else: + using_time = datetime.datetime.now() + + timestamp = ( + using_time.replace( + hour=0, minute=0, second=0, microsecond=0, tzinfo=datetime.timezone.utc + ).timestamp() + * 1000 + ) + + return timestamp diff --git a/tc_analyzer_lib/__init__.py b/tc_analyzer_lib/__init__.py new file mode 100644 index 0000000..5eded76 --- /dev/null +++ b/tc_analyzer_lib/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 +# flake8: noqa +# from .rn_analyzer import TCAnalyzer diff --git a/discord_analyzer/analysis/__init__.py b/tc_analyzer_lib/algorithms/__init__.py similarity index 100% rename from discord_analyzer/analysis/__init__.py rename to tc_analyzer_lib/algorithms/__init__.py diff --git a/tc_analyzer_lib/algorithms/analytics_interactions_script.py b/tc_analyzer_lib/algorithms/analytics_interactions_script.py new file mode 100644 index 0000000..25607ad --- /dev/null +++ b/tc_analyzer_lib/algorithms/analytics_interactions_script.py @@ -0,0 +1,88 @@ +import itertools + + +def per_account_interactions( + cursor_list, + dict_keys, +): + """ + get per account interactions for each heatmaps fields + + Parameters: + ------------ + cursor_list : list + the db cursor returned and converted as list + dict_keys : list + the list of dictionary keys, representing the features in database + + Returns: + ---------- + summed_per_account_interactions : dictionary + the dictionary of each feature having summed the counts per hour, + the dictionary of features is returned + """ + data_processed = {} + all_interaction_accounts = {} + + # for each interaction + for k in dict_keys: + temp_dict = {} + # get the data of a key in a map + samples = list(map(lambda data_dict: data_dict[k], cursor_list)) + + # flatten the list + samples_flattened = list(itertools.chain(*samples)) + + for sample in samples_flattened: + account_name = sample["account"] + interaction_count = sample["count"] + + if account_name not in temp_dict.keys(): + temp_dict[account_name] = interaction_count + else: + temp_dict[account_name] += interaction_count + + if account_name not in all_interaction_accounts.keys(): + all_interaction_accounts[account_name] = interaction_count + else: + all_interaction_accounts[account_name] += interaction_count + + data_processed[k] = refine_dictionary(temp_dict) + + data_processed["all_interaction_accounts"] = refine_dictionary( + all_interaction_accounts + ) + + summed_per_account_interactions = data_processed + + return summed_per_account_interactions + + +def refine_dictionary(interaction_dict): + """ + refine dictionary and add the account id to the dictionary + + Parameters: + ------------ + interaction_dict : dict + a dictionary like {'user1': 5, 'user2: 4} + keys are usernames and values are the count of each user interaction + + Returns: + ---------- + refined_dict : nested dictionary + the input refined like this + { + '0': { 'user1': 5 }, + '1': { 'user2': 4 } + } + """ + + refined_dict = {} + for idx, data_acc in enumerate(interaction_dict.keys()): + refined_dict[f"{idx}"] = { + "account": data_acc, + "count": interaction_dict[data_acc], + } + + return refined_dict diff --git a/discord_analyzer/analysis/compute_interaction_matrix_discord.py b/tc_analyzer_lib/algorithms/compute_interaction_matrix_discord.py similarity index 51% rename from discord_analyzer/analysis/compute_interaction_matrix_discord.py rename to tc_analyzer_lib/algorithms/compute_interaction_matrix_discord.py index df9bff5..e280e5d 100644 --- a/discord_analyzer/analysis/compute_interaction_matrix_discord.py +++ b/tc_analyzer_lib/algorithms/compute_interaction_matrix_discord.py @@ -1,10 +1,9 @@ import copy +from datetime import datetime from typing import Any -from discord_analyzer.DB_operations.mongodb_access import DB_access -from discord_analyzer.DB_operations.mongodb_query import MongodbQuery from numpy import diag_indices_from, ndarray -from tc_core_analyzer_lib.utils.activity import DiscordActivity +from tc_analyzer_lib.utils.mongo import MongoSingleton from .utils.compute_interaction_mtx_utils import ( generate_interaction_matrix, @@ -14,25 +13,38 @@ def compute_interaction_matrix_discord( acc_names: list[str], - dates: list[str], - channels: list[str], - db_access: DB_access, - **kwargs, + date_range: tuple[datetime, datetime], + resources: list[str], + resource_identifier: str, + platform_id: str, + interactions: list[str], + actions: list[str], ) -> dict[str, ndarray]: """ Computes interaction matrix from discord data - Input: - -------- - acc_names - [str] : list of all account names to be considered for analysis - dates - [str] : list of all dates to be considered for analysis - channels - [str] : list of all channel ids to be considered for analysis - db_access - obj : database access object - **kwargs : - activities - list[Activity] : - the list of activities to generate the matrix for - default is to include all activity types - minimum length is 1 + Parameters: + ------------- + acc_names : list[str] + list of all account names to be considered for analysis + date_range : list[datetime, datetime] + a list with length 2 + the first index is starting date range + the seocnd index is ending date range + resources : list[str] + list of all resource id to be considered for analysis + resource_identifier : str + the identifier for resource ids + could be `channel_id` for discord + platform_id : str + the platform to fetch its data from + interactions : list[str] + the list of interaction activities to generate the matrix for + minimum length is 1 + actions : list[str] + the list of action activities to generate the matrix for + we would assume actions as self-interactions in matrix + minimum length is 1 Output: --------- @@ -40,79 +52,58 @@ def compute_interaction_matrix_discord( keys are representative of an activity and the 2d matrix representing the interactions for the activity """ - activities = kwargs.get( - "activities", - [ - DiscordActivity.Mention, - DiscordActivity.Reply, - DiscordActivity.Reaction, - DiscordActivity.Lone_msg, - DiscordActivity.Thread_msg, - ], - ) - feature_projection = { - "channelId": 0, - "replier": 0, - "replied": 0, - "mentioner": 0, - "mentioned": 0, - "reacter": 0, - "reacted": 0, - "__v": 0, - "_id": 0, + client = MongoSingleton.get_instance().get_client() + feature_projection: dict[str, bool] = { + activity: True for activity in actions + interactions } - # intiate query - query = MongodbQuery() - - # set up query dictionary - query_dict = query.create_query_filter_account_channel_dates( - acc_names=acc_names, - channels=channels, - dates=list(dates), - date_key="date", - channel_key="channelId", - account_key="account_name", - ) + feature_projection = { + **feature_projection, + "user": True, + } + query = { + "$and": [ + {"user": {"$in": acc_names}}, + {resource_identifier: {"$in": resources}}, + { + "date": { + "$gte": date_range[0], + "$lt": date_range[1], + } + }, + ] + } - # create cursor for db - cursor = db_access.query_db_find( - table="heatmaps", query=query_dict, feature_projection=feature_projection + cursor = client[platform_id]["heatmaps"].find( + query, + feature_projection, ) db_results = list(cursor) per_acc_query_result = prepare_per_account(db_results=db_results) - per_acc_interaction = process_non_reactions(per_acc_query_result) + per_acc_interaction = process_actions( + per_acc_query_result, skip_fields=[*interactions, "user", "_id"] + ) # And now compute the interactions per account_name (`acc`) int_mat = {} # computing `int_mat` per activity - for activity in activities: + for activity in interactions + actions: int_mat[activity] = generate_interaction_matrix( per_acc_interactions=per_acc_interaction, acc_names=acc_names, activities=[activity], ) - # a person interacting to themselves is not counted as activity - if activity in [ - DiscordActivity.Reply, - DiscordActivity.Reaction, - DiscordActivity.Mention, - ]: + # removing self-interactions + if activity in interactions: int_mat[activity][diag_indices_from(int_mat[activity])] = 0 return int_mat -def process_non_reactions( +def process_actions( heatmaps_data_per_acc: dict[str, list[dict[str, Any]]], - skip_fields: list[str] = [ - "reacted_per_acc", - "mentioner_per_acc", - "replied_per_acc", - "account_name", - "date", - ], + skip_fields: list[str], ) -> dict[str, list[dict[str, Any]]]: """ process the non-interactions heatmap data to be like interaction @@ -145,7 +136,7 @@ def process_non_reactions( action_count = sum(document[action]) if action_count: document[action] = [ - [{"account": account, "count": sum(document[action])}] + {"account": account, "count": sum(document[action])} ] else: # action count was zero diff --git a/discord_analyzer/analysis/compute_member_activity.py b/tc_analyzer_lib/algorithms/compute_member_activity.py similarity index 78% rename from discord_analyzer/analysis/compute_member_activity.py rename to tc_analyzer_lib/algorithms/compute_member_activity.py index a395aed..574ebc1 100644 --- a/discord_analyzer/analysis/compute_member_activity.py +++ b/tc_analyzer_lib/algorithms/compute_member_activity.py @@ -11,11 +11,11 @@ import networkx as nx import numpy as np from dateutil.relativedelta import relativedelta -from discord_analyzer.analysis.member_activity_history import check_past_history -from discord_analyzer.analysis.utils.member_activity_history_utils import ( +from tc_analyzer_lib.algorithms.member_activity_history import check_past_history +from tc_analyzer_lib.algorithms.utils.member_activity_history_utils import ( MemberActivityPastUtils, ) -from discord_analyzer.analysis.utils.member_activity_utils import ( +from tc_analyzer_lib.algorithms.utils.member_activity_utils import ( assess_engagement, convert_to_dict, get_joined_accounts, @@ -24,15 +24,17 @@ store_based_date, update_activities, ) -from discord_analyzer.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase def compute_member_activity( - db_name: str, - connection_string: str, - channels: list[str], + platform_id: str, + resources: list[str], + resource_identifier: str, acc_names: list[str], - date_range: tuple[str, str], + date_range: list[datetime], + analyzer_config: PlatformConfigBase, window_param: dict[str, int], act_param: dict[str, int], load_past_data=True, @@ -42,11 +44,13 @@ def compute_member_activity( Parameters ------------ - db_name: (str) - guild id - connection_string: (str) - connection to db string - channels: [str] - list of all channel ids that should be analysed + platform_id: (str) - platform id + resources: [str] - list of all resource ids that should be analysed acc_names: [str] - list of all account names that should be analysed - date_range: [str] - list of first and last date to be analysed (one output per date) + date_range: tuple[datetime, datetime] - tuple of first and last date to be analysed (one output per date) + analyzer_config : PlatformConfigBase + the config for the analyzer to use. + representing which analytics to compute window_param: dict[str, int] - "period_size": window size in days. default = 7 "step_size": step size of sliding window in days. default = 1 @@ -105,14 +109,10 @@ def compute_member_activity( whether to load past data or not, default is True if True, will load the past data, if data was available in given range """ - guild_msg = f"GUILDID: {db_name}:" - - # make empty results output array - - # # # DATABASE SETTINGS # # # + platform_msg = f"PLATFORM_ID: {platform_id}:" # set up database access - db_access = DB_access(db_name, connection_string) + db_access = DB_access(platform_id) # specify the features not to be returned @@ -156,10 +156,7 @@ def compute_member_activity( ) else: past_activities_data = {} - new_date_range = [ - datetime.strptime(date_range[0], "%y/%m/%d"), - datetime.strptime(date_range[1], "%y/%m/%d"), - ] + new_date_range = [date_range[0], date_range[1]] starting_key = 0 # if in past there was an activity, we'll update the dictionaries @@ -218,51 +215,39 @@ def compute_member_activity( # (won't affect the loop but will affect codes after it) if max_range < 0: max_range = 0 - if acc_names != [] and channels != []: + if acc_names != [] and resources != []: for w_i in range(max_range): msg_info = "MEMBERACTIVITY ANALYTICS: PROGRESS" - msg = f"{guild_msg} {msg_info} {w_i + 1}/{max_range}" + msg = f"{platform_msg} {msg_info} {w_i + 1}/{max_range}" logging.info(msg) new_window_i = w_i + starting_key last_date = ( new_date_range[0] + relativedelta(days=window_param["step_size"] * w_i) - + relativedelta(days=window_param["period_size"] - 1) + + relativedelta(days=window_param["period_size"]) ) - # make list of all dates in window - date_list_w = [] - for x in range(window_param["period_size"]): - date_list_w.append(last_date - relativedelta(days=x)) - - # make empty array for date string values - date_list_w_str = np.zeros_like(date_list_w) - - # turn date time values into string - for i in range(len(date_list_w_str)): - date_list_w_str[i] = date_list_w[i].strftime("%Y-%m-%d") - window_start = last_date - relativedelta( days=window_param["period_size"] ) # updating account names for past 7 days acc_names = get_users_past_window( - window_start_date=window_start.strftime("%Y-%m-%d"), - window_end_date=last_date.strftime("%Y-%m-%d"), - collection=db_access.db_mongo_client[db_name]["heatmaps"], + window_start_date=window_start, + window_end_date=last_date, + collection=db_access.db_mongo_client[platform_id]["heatmaps"], ) if acc_names == []: time_window_str = f"{window_start.strftime('%Y-%m-%d')} - " time_window_str += last_date.strftime("%Y-%m-%d") logging.warning( - f"{guild_msg} No data for the time window {time_window_str}" + f"{platform_msg} No data for the time window {time_window_str}" ) logging.info( - """Getting latest joined instead! - So we could compute other activity types!""" + "Getting latest joined instead! " + "So we could compute other activity types!" ) # will get 5 users just to make sure @@ -274,11 +259,16 @@ def compute_member_activity( accounts=acc_names, action_params=act_param, period_size=window_param["period_size"], - db_access=db_access, - channels=channels, - analyze_dates=date_list_w_str, + platform_id=platform_id, + resources=resources, + resource_identifier=resource_identifier, + analyze_dates=( + last_date - timedelta(days=window_param["period_size"]), + last_date, + ), activities_name=activities_name, activity_dict=activity_dict, + analyzer_config=analyzer_config, ) # make empty dict for node attributes @@ -292,13 +282,13 @@ def compute_member_activity( nx.set_node_attributes(graph_out, node_att, "acc_name") # store results in dictionary - network_dict[last_date] = graph_out + network_dict[last_date - timedelta(days=1)] = graph_out # else if there was no past data else: max_range = 0 - start_dt = datetime.strptime(date_range[0], "%y/%m/%d") - end_dt = datetime.strptime(date_range[1], "%y/%m/%d") + start_dt = date_range[0] + end_dt = date_range[1] # get the accounts with their joining date joined_acc_dict = get_joined_accounts( @@ -311,7 +301,7 @@ def compute_member_activity( analytics_day_range=window_param["period_size"] - 1, joined_acc_dict=joined_acc_dict, load_past=load_past_data, - empty_channel_acc=(len(channels) != 0 and len(acc_names) != 0), + empty_channel_acc=(len(resources) != 0 and len(acc_names) != 0), ) return [network_dict, activity_dict_per_date] diff --git a/discord_analyzer/analysis/member_activity_history.py b/tc_analyzer_lib/algorithms/member_activity_history.py similarity index 76% rename from discord_analyzer/analysis/member_activity_history.py rename to tc_analyzer_lib/algorithms/member_activity_history.py index 72abc77..c26486f 100644 --- a/discord_analyzer/analysis/member_activity_history.py +++ b/tc_analyzer_lib/algorithms/member_activity_history.py @@ -1,19 +1,17 @@ # checking the past history of member activities -# Importing libraries -import datetime +from datetime import datetime, timedelta -from dateutil import parser -from discord_analyzer.analysis.utils.member_activity_history_utils import ( +from tc_analyzer_lib.algorithms.utils.member_activity_history_utils import ( MemberActivityPastUtils, ) -from discord_analyzer.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access # the main script function def check_past_history( db_access: DB_access, - date_range: tuple[str, str], + date_range: list[datetime], window_param: dict[str, int], collection_name: str = "memberactivities", verbose=False, @@ -26,10 +24,9 @@ def check_past_history( ------------- db_access: DB_access the database access class that queries are called through it - date_range: list of strings - a list of length 2, the first index has the start of the interval + date_range: tuple[datetime, datetime] + a tiple of length 2, the first index has the start of the interval and the second index is end of the interval - *Note*: Each value of the array should be in the format of `str(%y/%m/%d)` window_param : dict[str, int] a dictionary 2 values, the keys and values - "period_size": window size in days. default = 7 @@ -44,8 +41,8 @@ def check_past_history( ---------- all_activity_data_dict : dictionary the data for past activities - new_date_range : list - list of new date range in datetime format + new_date_range : tuple + tuple of new date range in datetime format because the last maximum_key : int the maximum key that the new data should start its data from @@ -59,11 +56,8 @@ def check_past_history( its length is: {len(date_range)}""" ) - # the input date_range in format of datetime - # converting the dates into datetime format - date_format = "%y/%m/%d" - date_range_start = datetime.datetime.strptime(date_range[0], date_format) - date_range_end = datetime.datetime.strptime(date_range[1], date_format) + date_range_start = date_range[0] + date_range_end = date_range[1] member_act_past_utils = MemberActivityPastUtils(db_access=db_access) @@ -87,6 +81,8 @@ def check_past_history( # getting a list of returned data past_data_new_schema = list(cursor) + db_analysis_end_date: datetime | None + # if any past data was available in DB if past_data_new_schema != []: if verbose: @@ -94,7 +90,7 @@ def check_past_history( # db_analysis_start_date = parser.parse(past_data[0]['date']) # db_analysis_start_date = date_range_start - db_analysis_end_date = parser.parse(past_data_new_schema[-1]["date"]) + db_analysis_end_date = past_data_new_schema[-1]["date"] # days_after_analysis_start = ( # db_analysis_end_date - db_analysis_start_date @@ -110,12 +106,13 @@ def check_past_history( # db_analysis_start_date = None db_analysis_end_date = None - # the input date_range in format of datetime - # converting the dates into datetime format - date_format = "%y/%m/%d" - date_range_start = datetime.datetime.strptime(date_range[0], date_format) - date_range_end = datetime.datetime.strptime(date_range[1], date_format) + # # the input date_range in format of datetime + # # converting the dates into datetime format + # date_format = "%y/%m/%d" + # date_range_start = datetime.datetime.strptime(date_range[0], date_format) + # date_range_end = datetime.datetime.strptime(date_range[1], date_format) + new_date_range: list[datetime] # if for the requested date_range, its results were available in db if (db_analysis_end_date is not None) and (date_range_start < db_analysis_end_date): # refine the dates @@ -127,7 +124,7 @@ def check_past_history( else: # start date would be the next day of the end day new_date_range = [ - db_analysis_end_date + datetime.timedelta(days=1), + db_analysis_end_date + timedelta(days=1), date_range_end, ] diff --git a/automation/utils/__init__.py b/tc_analyzer_lib/algorithms/neo4j_analysis/__init__.py similarity index 100% rename from automation/utils/__init__.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/__init__.py diff --git a/discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py b/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py similarity index 77% rename from discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py index bfd8956..ff6cc5a 100644 --- a/discord_analyzer/analysis/neo4j_analysis/analyzer_node_stats.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/analyzer_node_stats.py @@ -3,12 +3,18 @@ from uuid import uuid1 import pandas as pd -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.utils import ProjectionUtils +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps class NodeStats: - def __init__(self, threshold: int = 2) -> None: + def __init__( + self, + platform_id: str, + graph_schema: GraphSchema, + threshold: int = 2, + ) -> None: """ initialize the Node status computations object the status could be either one of `Sender`, `Receiver`, `Balanced` @@ -31,30 +37,29 @@ def __init__(self, threshold: int = 2) -> None: self.gds = neo4j_ops.gds self.driver = neo4j_ops.neo4j_driver self.threshold = threshold + self.platform_id = platform_id + self.graph_schema = graph_schema + self.projection_utils = ProjectionUtils(self.platform_id, self.graph_schema) - def compute_stats(self, guildId: str, from_start: bool) -> None: - projection_utils = ProjectionUtils(guildId=guildId) - + def compute_stats(self, from_start: bool) -> None: # possible dates to do the computations - possible_dates = projection_utils.get_dates(guildId=guildId) + possible_dates = self.projection_utils.get_dates() # if we didn't want to compute from the day start if not from_start: - computed_dates = self.get_computed_dates(projection_utils, guildId) + computed_dates = self.get_computed_dates() possible_dates = possible_dates - computed_dates for date in possible_dates: try: - self.compute_node_stats_wrapper(projection_utils, guildId, date) + self.compute_node_stats_wrapper(date) except Exception as exp: - msg = f"GUILDID: {guildId} " + msg = f"PLATFORMID: {self.platform_id} " logging.error( f"{msg} node stats computation for date: {date}, exp: {exp}" ) - def compute_node_stats_wrapper( - self, projection_utils: ProjectionUtils, guildId: str, date: float - ): + def compute_node_stats_wrapper(self, date: float): """ a wrapper for node stats computation process we're doing the projection here and computing on that, @@ -73,8 +78,7 @@ def compute_node_stats_wrapper( # NATURAL relations direction degreeCentrality computations graph_name = f"GraphStats_{uuid1()}" - projection_utils.project_temp_graph( - guildId=guildId, + self.projection_utils.project_temp_graph( graph_name=graph_name, weighted=True, relation_direction="NATURAL", @@ -89,7 +93,7 @@ def compute_node_stats_wrapper( } ) YIELD nodeId, score - RETURN gds.util.asNode(nodeId).userId AS userId, score + RETURN gds.util.asNode(nodeId).id AS userId, score """, { "graph_name": graph_name, @@ -106,7 +110,7 @@ def compute_node_stats_wrapper( } ) YIELD nodeId, score - RETURN gds.util.asNode(nodeId).userId AS userId, score + RETURN gds.util.asNode(nodeId).id AS userId, score """, { "graph_name": graph_name, @@ -115,7 +119,7 @@ def compute_node_stats_wrapper( df = self.get_date_stats(natural_dc, reverse_dc, threshold=self.threshold) - self.save_properties_db(guildId, df, date) + self.save_properties_db(df, date) _ = self.gds.run_cypher( "CALL gds.graph.drop($graph_name)", { @@ -123,20 +127,19 @@ def compute_node_stats_wrapper( }, ) - def get_computed_dates( - self, projection_utils: ProjectionUtils, guildId: str - ) -> set[float]: + def get_computed_dates(self) -> set[float]: """ get the computed dates of our guild """ - query = """ - MATCH (:DiscordAccount) - -[r:INTERACTED_IN]->(g:Guild {guildId: $guild_id}) + query = f""" + MATCH (:{self.graph_schema.platform_label}) + -[r:{self.graph_schema.interacted_in_rel}]-> + (g:{self.graph_schema.platform_label} {{id: $platform_id}}) WHERE r.status IS NOT NULL RETURN r.date as computed_dates """ - computed_dates = projection_utils.get_computed_dates( - query=query, guild_id=guildId + computed_dates = self.projection_utils.get_computed_dates( + query=query, platform_id=self.platform_id ) return computed_dates @@ -217,9 +220,7 @@ def _compute_stats( return merged_df - def save_properties_db( - self, guildId: str, user_status: pd.DataFrame, date: float - ) -> None: + def save_properties_db(self, user_status: pd.DataFrame, date: float) -> None: """ save user stats to their nodes @@ -237,16 +238,20 @@ def save_properties_db( userId = row["userId"] status = row["stats"] - query = """ - MATCH (a:DiscordAccount {userId: $userId}) - MATCH (g:Guild {guildId: $guildId}) - MERGE (a) -[r:INTERACTED_IN { + query = f""" + MATCH (a:{self.graph_schema.user_label} {{id: $userId}}) + MATCH (g:{self.graph_schema.platform_label} {{id: $platform_id}}) + MERGE (a) -[r:INTERACTED_IN {{ date: $date - }] -> (g) + }}] -> (g) SET r.status = $status """ session.run( - query, userId=userId, guildId=guildId, status=status, date=date + query, + userId=userId, + platform_id=self.platform_id, + status=status, + date=date, ) - prefix = f"GUILDID: {guildId}: " + prefix = f"PLATFORMID: {self.platform_id}: " logging.info(f"{prefix}Node stats saved for the date: {date}") diff --git a/discord_analyzer/analysis/neo4j_analysis/centrality.py b/tc_analyzer_lib/algorithms/neo4j_analysis/centrality.py similarity index 87% rename from discord_analyzer/analysis/neo4j_analysis/centrality.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/centrality.py index 103897d..ede8762 100644 --- a/discord_analyzer/analysis/neo4j_analysis/centrality.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/centrality.py @@ -2,21 +2,25 @@ from typing import Literal import pandas as pd -from discord_analyzer.analysis.neo4j_utils.neo4j_metrics import Neo4JMetrics -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.utils import ( + Neo4JMetrics, + ProjectionUtils, +) +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps, Query class Centerality: - def __init__(self) -> None: + def __init__(self, platform_id: str, graph_schema: GraphSchema) -> None: """ centerality algorithms """ self.neo4j_ops = Neo4jOps.get_instance() + self.platform_id = platform_id + self.graph_schema = graph_schema def compute_degree_centerality( self, - guildId: str, direction: str, from_start: bool, **kwargs, @@ -29,18 +33,13 @@ def compute_degree_centerality( Parameters: ------------ - guildId : str - the user nodes of guildId direction : str the direction of relation could be `in_degree`, `out_degree`, `undirected` from_start : bool whether to compute everything from scratch or continue the computations - kwargs : dict - node : str - the name of the node we're computing degree centrality - default is `DiscordAccount` + **kwargs : dict weighted : bool assuming the edges as weighted or not default is `True` @@ -66,7 +65,7 @@ def compute_degree_centerality( the degree centerality per date for each user """ - node = "DiscordAccount" if "node" not in kwargs.keys() else kwargs["node"] + node = self.graph_schema.user_label weighted = True if "weighted" not in kwargs.keys() else kwargs["weighted"] normalize = False if "normalize" not in kwargs.keys() else kwargs["normalize"] preserve_parallel = ( @@ -85,33 +84,34 @@ def compute_degree_centerality( could produce wrong results!""" ) + interacted_with_label = self.graph_schema.interacted_with_rel # determining one line of the query useing the direction variable if direction == "in_degree": - query = f"MATCH (a:{node})<-[r:INTERACTED_WITH]-(b:{node})" + query = f"MATCH (a:{node})<-[r:{interacted_with_label}]-(b:{node})" elif direction == "out_degree": - query = f"MATCH (a:{node})-[r:INTERACTED_WITH]->(b:{node})" + query = f"MATCH (a:{node})-[r:{interacted_with_label}]->(b:{node})" elif direction == "undirected": - query = f"MATCH (a:{node})-[r:INTERACTED_WITH]-(b:{node})" + query = f"MATCH (a:{node})-[r:{interacted_with_label}]-(b:{node})" results = self.neo4j_ops.gds.run_cypher( f""" {query} - WHERE r.guildId = $guild_id + WHERE r.platformId = $platform_id RETURN - a.userId as a_userId, + a.id as a_userId, r.date as date, r.weight as weight, - b.userId as b_userId + b.id as b_userId """, - params={"guild_id": guildId}, + params={"platform_id": self.platform_id}, ) dates_to_compute = set(results["date"].value_counts().index) if not from_start: - projection_utils = ProjectionUtils(guildId=guildId) + projection_utils = ProjectionUtils(self.platform_id, self.graph_schema) dates_to_compute = self._get_dates_to_compute( - projection_utils, dates_to_compute, guildId + projection_utils, dates_to_compute ) if recompute_dates is not None: dates_to_compute = dates_to_compute.union(recompute_dates) @@ -130,7 +130,6 @@ def _get_dates_to_compute( self, projection_utils: ProjectionUtils, user_interaction_dates: set[float], - guildId: str, ) -> set[float]: """ exclude available analyzed date @@ -142,13 +141,15 @@ def _get_dates_to_compute( guildId : str the guildId to get computations date """ - query = """ - MATCH (g:Guild {guildId: $guild_id}) + query = f""" + MATCH (g:{self.graph_schema.platform_label} {{id: $platform_id}}) -[r:HAVE_METRICS] -> (g) WHERE r.decentralizationScore IS NOT NULL RETURN r.date as computed_dates """ - computed_dates = projection_utils.get_computed_dates(query, guild_id=guildId) + computed_dates = projection_utils.get_computed_dates( + query, platform_id=self.platform_id + ) dates_to_compute = user_interaction_dates - computed_dates @@ -277,7 +278,6 @@ def normalize_degree_centrality( def compute_network_decentrality( self, - guildId: str, from_start: bool, save: bool = True, weighted: bool = False, @@ -308,7 +308,6 @@ def compute_network_decentrality( """ results_undirected = self.compute_degree_centerality( - guildId=guildId, direction="undirected", weighted=weighted, normalize=True, @@ -327,13 +326,12 @@ def compute_network_decentrality( ) if save: - self.save_decentralization_score(guildId, network_decentrality) + self.save_decentralization_score(network_decentrality) return network_decentrality def save_decentralization_score( self, - guildId: str, decentrality_score: dict[float, float | Literal[-1]], ) -> None: """ @@ -349,13 +347,13 @@ def save_decentralization_score( # preparing the queries queries: list[Query] = [] for date in decentrality_score.keys(): - query_str = """ - MATCH (g: Guild {guildId: $guild_id}) - MERGE (g) -[r:HAVE_METRICS {date: $date}]-> (g) + query_str = f""" + MATCH (g: {self.graph_schema.platform_label} {{id: $platform_id}}) + MERGE (g) -[r:HAVE_METRICS {{date: $date}}]-> (g) SET r.decentralizationScore = $score """ parameters = { - "guild_id": guildId, + "platform_id": self.platform_id, "score": decentrality_score[date], "date": date, } @@ -364,5 +362,5 @@ def save_decentralization_score( self.neo4j_ops.run_queries_in_batch( queries, - message=f"GUILDID: {guildId}: Saving Network Decentrality:", + message=f"PLATFORMID: {self.platform_id}: Saving Network Decentrality:", ) diff --git a/discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py b/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py similarity index 60% rename from discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py index 9fe309d..2323851 100644 --- a/discord_analyzer/analysis/neo4j_analysis/local_clustering_coefficient.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/local_clustering_coefficient.py @@ -1,15 +1,23 @@ import logging from uuid import uuid1 -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.utils import ProjectionUtils +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib import Neo4jOps class LocalClusteringCoeff: - def __init__(self) -> None: + def __init__(self, platform_id: str, graph_schema: GraphSchema) -> None: self.gds = Neo4jOps.get_instance().gds + self.graph_schema = graph_schema + self.platform_id = platform_id - def compute(self, guildId: str, from_start: bool = False) -> None: + self.projection_utils = ProjectionUtils( + platform_id=self.platform_id, graph_schema=self.graph_schema + ) + self.log_prefix = f"PLATFORMID: {platform_id} " + + def compute(self, from_start: bool = False) -> None: """ computing the localClusteringCoefficient per date of each interaction and saving them in nodes @@ -17,23 +25,15 @@ def compute(self, guildId: str, from_start: bool = False) -> None: Parameters: ------------ - guildId : str - the guild to compute the analytics for from_start : bool whether to compute the metric from the first day or not if True, then would compute from start default is False - - Returns: - --------- - `None` """ - projection_utils = ProjectionUtils(guildId=guildId) - # Getting all possible dates - computable_dates = projection_utils.get_dates(guildId=guildId) + computable_dates = self.projection_utils.get_dates() - computed_dates = self.get_computed_dates(projection_utils, guildId) + computed_dates = self.get_computed_dates() # compute for each date to_compute: set[float] @@ -45,18 +45,14 @@ def compute(self, guildId: str, from_start: bool = False) -> None: # for the computation date for date in to_compute: try: - self.local_clustering_computation_wrapper( - projection_utils=projection_utils, guildId=guildId, date=date - ) + self.local_clustering_computation_wrapper(date=date) except Exception as exp: - msg = f"GUILDID: {guildId} " logging.error( - f"{msg}localClustering computation for date: {date}, exp: {exp}" + f"{self.log_prefix}localClustering computation for " + f"date: {date}, exp: {exp}" ) - def local_clustering_computation_wrapper( - self, projection_utils: ProjectionUtils, guildId: str, date: float - ) -> None: + def local_clustering_computation_wrapper(self, date: float) -> None: """ a wrapper for local clustering coefficient computation process we're doing the projection here and computing on that, @@ -73,17 +69,14 @@ def local_clustering_computation_wrapper( timestamp of the relation """ graph_projected_name = f"GraphLocalClustering_{uuid1()}" - projection_utils.project_temp_graph( - guildId=guildId, + self.projection_utils.project_temp_graph( graph_name=graph_projected_name, weighted=True, date=date, ) # get the results as pandas dataframe - self.compute_graph_lcc( - date=date, graph_name=graph_projected_name, guildId=guildId - ) + self.compute_graph_lcc(date=date, graph_name=graph_projected_name) # dropping the computed date _ = self.gds.run_cypher( @@ -95,37 +88,31 @@ def local_clustering_computation_wrapper( }, ) - def get_computed_dates( - self, projection_utils: ProjectionUtils, guildId: str - ) -> set[float]: + def get_computed_dates(self) -> set[float]: """ get localClusteringCoeff computed dates - Parameters: - ------------ - guildId : str - the guild we want the temp relationships - between its members - projection_utils : ProjectionUtils - the utils needed to get the work done - Returns: ---------- computed_dates : set[float] the computation dates """ + # getting the dates computed before - query = """ - MATCH (:DiscordAccount) - -[r:INTERACTED_IN]->(g:Guild {guildId: $guild_id}) + query = f""" + MATCH (:{self.graph_schema.user_label}) + -[r:{self.graph_schema.interacted_in_rel}]-> + (g:{self.graph_schema.platform_label} {{id: $platform_id}}) WHERE r.localClusteringCoefficient IS NOT NULL RETURN r.date as computed_dates """ - computed_dates = projection_utils.get_computed_dates(query, guild_id=guildId) + computed_dates = self.projection_utils.get_computed_dates( + query, platform_id=self.platform_id + ) return computed_dates - def compute_graph_lcc(self, date: float, graph_name: str, guildId: str) -> None: + def compute_graph_lcc(self, date: float, graph_name: str) -> None: """ compute the localClusteringCoefficient for the given graph and write the results back to the nodes @@ -136,28 +123,28 @@ def compute_graph_lcc(self, date: float, graph_name: str, guildId: str) -> None: timestamp of the relation graph_name : str the operation would be done on the graph - guild : str - the guildId to save the data for it """ - msg = f"GUILDID: {guildId}" try: _ = self.gds.run_cypher( - """ + f""" CALL gds.localClusteringCoefficient.stream( $graph_name ) YIELD nodeId, localClusteringCoefficient WITH gds.util.asNode(nodeId) as userNode, localClusteringCoefficient - MATCH (g:Guild {guildId: $guild_id}) - MERGE (userNode) -[r:INTERACTED_IN {date: $date}]-> (g) + MATCH (g:{self.graph_schema.platform_label} {{id: $platform_id}}) + MERGE (userNode) -[r:{self.graph_schema.interacted_in_rel} {{date: $date}}]-> (g) SET r.localClusteringCoefficient = localClusteringCoefficient """, { "graph_name": graph_name, - "guild_id": guildId, + "platform_id": self.platform_id, "date": date, }, ) except Exception as exp: - logging.error(f"{msg} error in computing localClusteringCoefficient, {exp}") + logging.error( + f"{self.log_prefix} error in computing localClusteringCoefficient!" + f" Exception: {exp}" + ) diff --git a/discord_analyzer/analysis/neo4j_analysis/louvain.py b/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py similarity index 56% rename from discord_analyzer/analysis/neo4j_analysis/louvain.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py index ada52ff..8a00ddf 100644 --- a/discord_analyzer/analysis/neo4j_analysis/louvain.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/louvain.py @@ -1,54 +1,57 @@ import logging from uuid import uuid1 -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.utils import ProjectionUtils +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps class Louvain: - def __init__(self) -> None: + def __init__(self, platform_id: str, graph_schema: GraphSchema) -> None: """ louvain algorithm wrapper to compute """ self.neo4j_ops = Neo4jOps.get_instance() + self.platform_id = platform_id + self.graph_schema = graph_schema - def compute(self, guild_id: str, from_start: bool = False) -> None: + self.projection_utils = ProjectionUtils( + platform_id=platform_id, graph_schema=graph_schema + ) + self.log_prefix = f"PLATFORMID: {platform_id} " + + def compute(self, from_start: bool = False) -> None: """ compute the louvain modularity score for a guild Parameters ------------ - guild_id : str - the guild_id to compute the the algorithm for from_start : bool whether to compute the metric from the first day or not if True, then would compute from start default is False """ - projection_utils = ProjectionUtils(guildId=guild_id) - computable_dates = projection_utils.get_dates(guildId=guild_id) + computable_dates = self.projection_utils.get_dates() # compute for each date to_compute: set[float] if from_start: to_compute = computable_dates else: - computed_dates = self.get_computed_dates(projection_utils, guild_id) + computed_dates = self.get_computed_dates() to_compute = computable_dates - computed_dates for date in to_compute: try: - self.louvain_computation_wrapper(projection_utils, guild_id, date) + self.louvain_computation_wrapper(date) except Exception as exp: - msg = f"GUILDID: {guild_id} " logging.error( - f"{msg}Louvain Modularity computation for date: {date}, exp: {exp}" + f"Exception: {self.log_prefix}Louvain Modularity " + f" computation for date: {date}, exp: {exp}" ) - def louvain_computation_wrapper( - self, projection_utils: ProjectionUtils, guild_id: str, date: float - ) -> None: + def louvain_computation_wrapper(self, date: float) -> None: """ a wrapper for louvain modularity computation process we're doing the projection here and computing on that, @@ -56,17 +59,11 @@ def louvain_computation_wrapper( Parameters: ------------ - projection_utils : ProjectionUtils - the utils needed to get the work done - guild_id : str - the guild we want the temp relationships - between its members date : float timestamp of the relation """ graph_projected_name = f"GraphLouvain_{uuid1()}" - projection_utils.project_temp_graph( - guildId=guild_id, + self.projection_utils.project_temp_graph( graph_name=graph_projected_name, weighted=True, date=date, @@ -74,9 +71,7 @@ def louvain_computation_wrapper( ) # get the results as pandas dataframe - self.compute_graph_louvain( - date=date, graph_name=graph_projected_name, guild_id=guild_id - ) + self.compute_graph_louvain(date=date, graph_name=graph_projected_name) # dropping the computed date _ = self.neo4j_ops.gds.run_cypher( @@ -88,39 +83,29 @@ def louvain_computation_wrapper( }, ) - def get_computed_dates( - self, projection_utils: ProjectionUtils, guildId: str - ) -> set[float]: + def get_computed_dates(self) -> set[float]: """ get localClusteringCoeff computed dates - Parameters: - ------------ - guildId : str - the guild we want the temp relationships - between its members - projection_utils : ProjectionUtils - the utils needed to get the work done - Returns: ---------- computed_dates : set[float] the computation dates """ # getting the dates computed before - query = """ - MATCH (g:Guild {guildId: $guild_id}) + query = f""" + MATCH (g:{self.graph_schema.platform_label} {{id: $platform_id}}) -[r:HAVE_METRICS]->(g) WHERE r.louvainModularityScore IS NOT NULL RETURN r.date as computed_dates """ - computed_dates = projection_utils.get_computed_dates(query, guild_id=guildId) + computed_dates = self.projection_utils.get_computed_dates( + query, platform_id=self.platform_id + ) return computed_dates - def compute_graph_louvain( - self, date: float, graph_name: str, guild_id: str - ) -> None: + def compute_graph_louvain(self, date: float, graph_name: str) -> None: """ compute louvain algorithm for the projected graph and save the results back into db @@ -131,29 +116,27 @@ def compute_graph_louvain( timestamp of the relation graph_name : str the operation would be done on the graph - guild_id : str - the guild_id to save the data for it """ - msg = f"GUILDID: {guild_id}" try: _ = self.neo4j_ops.gds.run_cypher( - """ + f""" CALL gds.louvain.stats($graph_name) YIELD modularity WITH modularity - MATCH (g:Guild {guildId: $guild_id}) - MERGE (g) -[r:HAVE_METRICS { + MATCH (g:{self.graph_schema.platform_label} {{id: $platform_id}}) + MERGE (g) -[r:HAVE_METRICS {{ date: $date - }]-> (g) + }}]-> (g) SET r.louvainModularityScore = modularity """, { "graph_name": graph_name, - "guild_id": guild_id, + "platform_id": self.platform_id, "date": date, }, ) except Exception as exp: logging.error( - f"{msg} Error in computing louvain modularity algorithm, {exp}" + f"{self.log_prefix} Error in computing " + f"louvain modularity algorithm, {exp}" ) diff --git a/tc_analyzer_lib/algorithms/neo4j_analysis/utils/__init__.py b/tc_analyzer_lib/algorithms/neo4j_analysis/utils/__init__.py new file mode 100644 index 0000000..1129132 --- /dev/null +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/utils/__init__.py @@ -0,0 +1,3 @@ +# flake8: noqa +from .neo4j_metrics import Neo4JMetrics +from .projection_utils import ProjectionUtils diff --git a/discord_analyzer/analysis/neo4j_utils/neo4j_metrics.py b/tc_analyzer_lib/algorithms/neo4j_analysis/utils/neo4j_metrics.py similarity index 100% rename from discord_analyzer/analysis/neo4j_utils/neo4j_metrics.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/utils/neo4j_metrics.py diff --git a/discord_analyzer/analysis/neo4j_utils/projection_utils.py b/tc_analyzer_lib/algorithms/neo4j_analysis/utils/projection_utils.py similarity index 76% rename from discord_analyzer/analysis/neo4j_utils/projection_utils.py rename to tc_analyzer_lib/algorithms/neo4j_analysis/utils/projection_utils.py index b67e7c8..3f8bd9f 100644 --- a/discord_analyzer/analysis/neo4j_utils/projection_utils.py +++ b/tc_analyzer_lib/algorithms/neo4j_analysis/utils/projection_utils.py @@ -1,16 +1,22 @@ import logging +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps class ProjectionUtils: - def __init__(self, guildId: str) -> None: + def __init__(self, platform_id: str, graph_schema: GraphSchema) -> None: self.gds = Neo4jOps.get_instance().gds - self.guildId = guildId + self.platform_id = platform_id + + self.user_label = graph_schema.user_label + self.platform_label = graph_schema.platform_label + self.between_user_label = graph_schema.interacted_with_rel + self.between_user_platform_label = graph_schema.interacted_in_rel + self.membership_label = graph_schema.member_relation def project_temp_graph( self, - guildId: str, graph_name: str, **kwargs, ) -> None: @@ -19,8 +25,8 @@ def project_temp_graph( Parameters: ------------ - guildId : str - the guildId we want to do the projection + platform_id : str + the platform_id we want to do the projection graph_name : str the name we want to name the projected graph **kwargs : @@ -40,6 +46,7 @@ def project_temp_graph( date : float if we want to include date in the graph projection query """ + # getting kwargs weighted = False if "weighted" in kwargs: @@ -52,13 +59,13 @@ def project_temp_graph( projection_query: str if "date" in kwargs: date = kwargs["date"] - projection_query = f"""MATCH (a:DiscordAccount) - -[r:INTERACTED_WITH {{guildId: '{guildId}', date: {date}}}]-> - (b:DiscordAccount) """ + projection_query = f"""MATCH (a:{self.user_label}) + -[r:{self.between_user_label} {{platformId: '{self.platform_id}', date: {date}}}]-> + (b:{self.user_label}) """ else: - projection_query = f"""MATCH (a:DiscordAccount) - -[r:INTERACTED_WITH {{guildId: '{guildId}'}}]-> - (b:DiscordAccount) """ + projection_query = f"""MATCH (a:{self.user_label}) + -[r:{self.between_user_label} {{platformId: '{self.platform_id}'}}]-> + (b:{self.user_label}) """ if "projection_query" in kwargs: projection_query = kwargs["projection_query"] @@ -102,7 +109,7 @@ def project_temp_graph( """ ) - def get_dates(self, guildId: str) -> set[float]: + def get_dates(self) -> set[float]: """ get all the dates we do have on the INTERACTED_WITH relations @@ -113,11 +120,12 @@ def get_dates(self, guildId: str) -> set[float]: """ dates = self.gds.run_cypher( f""" - MATCH (a:DiscordAccount) - -[r:INTERACTED_WITH {{guildId: '{guildId}'}}]-() + MATCH (a:{self.user_label}) + -[r:{self.between_user_label} {{platformId: $platform_id}}]-() WITH DISTINCT(r.date) as dates RETURN dates - """ + """, + params={"platform_id": self.platform_id}, ) computable_dates_set = set(dates["dates"].values) diff --git a/discord_analyzer/DB_operations/__init__.py b/tc_analyzer_lib/algorithms/utils/__init__.py similarity index 100% rename from discord_analyzer/DB_operations/__init__.py rename to tc_analyzer_lib/algorithms/utils/__init__.py diff --git a/discord_analyzer/analysis/utils/activity.py b/tc_analyzer_lib/algorithms/utils/activity.py similarity index 100% rename from discord_analyzer/analysis/utils/activity.py rename to tc_analyzer_lib/algorithms/utils/activity.py diff --git a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py b/tc_analyzer_lib/algorithms/utils/compute_interaction_mtx_utils.py similarity index 60% rename from discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py rename to tc_analyzer_lib/algorithms/utils/compute_interaction_mtx_utils.py index d77ef10..7cdd01b 100644 --- a/discord_analyzer/analysis/utils/compute_interaction_mtx_utils.py +++ b/tc_analyzer_lib/algorithms/utils/compute_interaction_mtx_utils.py @@ -2,10 +2,9 @@ from typing import Any import numpy as np -from discord_analyzer.analysis.analytics_interactions_script import ( +from tc_analyzer_lib.algorithms.analytics_interactions_script import ( per_account_interactions, ) -from tc_core_analyzer_lib.utils.activity import DiscordActivity def prepare_per_account(db_results: list) -> dict[str, list[dict]]: @@ -29,7 +28,7 @@ def prepare_per_account(db_results: list) -> dict[str, list[dict]]: # a dictionary for results of each account for db_record in db_results: - acc_name = db_record["account_name"] + acc_name = db_record["user"] per_acc_query_result.setdefault(acc_name, []) per_acc_query_result[acc_name].append(db_record) @@ -54,7 +53,7 @@ def generate_interaction_matrix( list of all account names to be considered for analysis activities : list[str] the activities to include for generating interaction matrix - min length is 1 + it should be the heatmaps analytics fields Returns: --------- @@ -67,19 +66,18 @@ def generate_interaction_matrix( for acc in per_acc_interactions.keys(): db_res_per_acc = per_acc_interactions[acc] - dict_keys = prepare_interaction_field_names(activities=activities) # get results from db db_results = per_account_interactions( cursor_list=db_res_per_acc, - dict_keys=dict_keys, + dict_keys=activities, ) # obtain results for all interactions summed together acc_out_int = db_results["all_interaction_accounts"] - # for each interacting account + # for each interacting user for int_acc in acc_out_int.values(): - # if the interacting account is in acc_names + # if the interacting user is in acc_names if int_acc["account"] in acc_names: # store data in int_network int_matrix[ @@ -88,41 +86,3 @@ def generate_interaction_matrix( ] = int_acc["count"] return int_matrix - - -def prepare_interaction_field_names(activities: list[str]) -> list[str]: - """ - convert activity names to the field names - as are saved under the heatmaps collection - - - Parameters: - ------------ - activities : list[str] - the activities to be converted to db field names - could be the items below - - `mention` - - `reply` - - `reaction` - - Returns: - --------- - field_names : list[str] - the field names from database to use - """ - field_names = [] - for activity in activities: - if activity == DiscordActivity.Mention: - field_names.append("mentioner_per_acc") - elif activity == DiscordActivity.Reply: - field_names.append("replied_per_acc") - elif activity == DiscordActivity.Reaction: - field_names.append("reacted_per_acc") - elif activity == DiscordActivity.Thread_msg: - field_names.append("thr_messages") - elif activity == DiscordActivity.Lone_msg: - field_names.append("lone_messages") - else: - logging.warning("prepare_interaction_field_names: Wrong activity given!") - - return field_names diff --git a/discord_analyzer/analysis/utils/member_activity_history_utils.py b/tc_analyzer_lib/algorithms/utils/member_activity_history_utils.py similarity index 91% rename from discord_analyzer/analysis/utils/member_activity_history_utils.py rename to tc_analyzer_lib/algorithms/utils/member_activity_history_utils.py index 20e8c00..cff4f9d 100644 --- a/discord_analyzer/analysis/utils/member_activity_history_utils.py +++ b/tc_analyzer_lib/algorithms/utils/member_activity_history_utils.py @@ -2,9 +2,8 @@ from datetime import datetime, timedelta from typing import Any -from dateutil import parser -from discord_analyzer.DB_operations.mongodb_access import DB_access from numpy import array +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access class MemberActivityPastUtils: @@ -141,13 +140,13 @@ def update_all_joined_day( return all_joined_day - def create_past_history_query(self, date_range): + def create_past_history_query(self, date_range: tuple[datetime, datetime]): """ create a query to retreive the data that are not analyzed Parameters: ------------- - date_range: list + date_range: tuple[datetime, datetime] a list of length 2, the first index has the start of the interval and the second index is end of the interval @@ -156,8 +155,8 @@ def create_past_history_query(self, date_range): query : dictionary the query representing the dictionary of filters """ - date_interval_start = datetime.strptime(date_range[0], "%y/%m/%d").isoformat() - date_interval_end = datetime.strptime(date_range[1], "%y/%m/%d").isoformat() + date_interval_start = date_range[0] + date_interval_end = date_range[1] query = { "date": { @@ -169,7 +168,12 @@ def create_past_history_query(self, date_range): return query - def convert_back_to_old_schema(self, retrieved_data, date_start, window_param): + def convert_back_to_old_schema( + self, + retrieved_data: list[dict], + date_start: datetime, + window_param: dict[str, str], + ) -> dict: """ convert the retrieved data back to the old schema we had, to do the analysis @@ -181,7 +185,8 @@ def convert_back_to_old_schema(self, retrieved_data, date_start, window_param): the starting point of analysis days_after_analysis_start : int the day count after analysis which are available in DB - window_param : tuple of int with len 2 + window_param : dict[str, str] + the window parameters containing the step_size and period_size Returns: ---------- @@ -215,9 +220,9 @@ def convert_back_to_old_schema(self, retrieved_data, date_start, window_param): for idx in range(len(retrieved_data)): db_record = retrieved_data[idx] - parser.parse(db_record["date"]) - timedelta( - days=window_param["period_size"] - ) + # parser.parse(db_record["date"]) - timedelta( + # days=window_param["period_size"] + # ) for activity in activity_dict.keys(): try: @@ -243,7 +248,7 @@ def convert_back_to_old_schema(self, retrieved_data, date_start, window_param): return activity_dict def _get_accounts_per_date( - self, joined_acc, date, date_key="joinedAt", account_key="discordId" + self, joined_acc, date, date_key="joined_at", account_key="id" ): """ get the accounts for a special date @@ -274,7 +279,9 @@ def _get_accounts_per_date( return account_names - def _get_joined_accounts(self, date_range) -> list[dict[str, Any]]: + def _get_joined_accounts( + self, date_range: tuple[datetime, datetime] + ) -> list[dict[str, Any]]: """ get the joined accounts for a time interval to a date range @@ -291,11 +298,11 @@ def _get_joined_accounts(self, date_range) -> list[dict[str, Any]]: an array of dictionaries each dictionary has `account` and `joinDate` member """ - query = {"joinedAt": {"$gte": date_range[0], "$lte": date_range[1]}} - feature_projection = {"joinedAt": 1, "discordId": 1, "_id": 0} + query = {"joined_at": {"$gte": date_range[0], "$lte": date_range[1]}} + feature_projection = {"joined_at": 1, "id": 1, "_id": 0} # quering the db now - cursor = self.db_access.query_db_find("guildmembers", query, feature_projection) + cursor = self.db_access.query_db_find("rawmembers", query, feature_projection) data = list(cursor) diff --git a/discord_analyzer/analysis/utils/member_activity_utils.py b/tc_analyzer_lib/algorithms/utils/member_activity_utils.py similarity index 72% rename from discord_analyzer/analysis/utils/member_activity_utils.py rename to tc_analyzer_lib/algorithms/utils/member_activity_utils.py index 976c865..10a51ea 100644 --- a/discord_analyzer/analysis/utils/member_activity_utils.py +++ b/tc_analyzer_lib/algorithms/utils/member_activity_utils.py @@ -1,15 +1,16 @@ +import logging from datetime import datetime, timedelta from typing import Any import numpy as np import pymongo -from discord_analyzer.analysis.compute_interaction_matrix_discord import ( +from networkx import DiGraph +from tc_analyzer_lib.algorithms.compute_interaction_matrix_discord import ( compute_interaction_matrix_discord, ) -from discord_analyzer.DB_operations.mongodb_access import DB_access -from networkx import DiGraph +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase from tc_core_analyzer_lib.assess_engagement import EngagementAssessment -from tc_core_analyzer_lib.utils.activity import DiscordActivity def get_joined_accounts(db_access: DB_access, date_range: tuple[datetime, datetime]): @@ -28,13 +29,13 @@ def get_joined_accounts(db_access: DB_access, date_range: tuple[datetime, dateti Returns: ---------- data : list of dictionaries - an array of dictionaries, each dictionary has `discordId` and `joined_at` member + an array of dictionaries, each dictionary has `id` and `joined_at` member """ - query = {"joinedAt": {"$gte": date_range[0], "$lte": date_range[1]}} - feature_projection = {"joinedAt": 1, "discordId": 1, "_id": 0} + query = {"joined_at": {"$gte": date_range[0], "$lte": date_range[1]}} + feature_projection = {"joined_at": 1, "id": 1, "_id": 0} # quering the db now - cursor = db_access.query_db_find("guildmembers", query, feature_projection) + cursor = db_access.query_db_find("rawmembers", query, feature_projection) data = list(cursor) @@ -65,7 +66,7 @@ def store_based_date( to make sure that the dates of analytics is for the past `analytics_day_range` days, not `analytics_day_range` forward joined_acc_dict : array of dictionary - an array of dictionaries, each dictionary has `discordId` and `joined_at` member + an array of dictionaries, each dictionary has `id` and `joined_at` member load_past : bool whether we loaded the past data or start processing from scratch If True, indicates that the past data is loaded beside the analytics data @@ -80,10 +81,10 @@ def store_based_date( return [] # post processing the - account_names = list(map(lambda record: record["discordId"], joined_acc_dict)) + account_names = list(map(lambda record: record["id"], joined_acc_dict)) acc_join_date = list( map( - lambda record: record["joinedAt"].date(), + lambda record: record["joined_at"].date(), joined_acc_dict, ) ) @@ -110,7 +111,7 @@ def store_based_date( else: date_using = analytics_date - data_record["date"] = date_using.isoformat() + data_record["date"] = date_using # analytics that were done in that date for activity in all_activities.keys(): @@ -132,9 +133,7 @@ def store_based_date( # if there was no data just save empty date records if max_days_after == 0: data_record = {} - data_record["date"] = ( - start_date + timedelta(days=analytics_day_range) - ).isoformat() + data_record["date"] = start_date + timedelta(days=analytics_day_range) for activity in all_activities.keys(): data_record[activity] = [] @@ -181,8 +180,8 @@ def convert_to_dict(data: list[Any], dict_keys: list[str]) -> dict[str, dict]: def get_users_past_window( - window_start_date: str, - window_end_date: str, + window_start_date: datetime, + window_end_date: datetime, collection: pymongo.collection.Collection, ) -> list[str]: """ @@ -190,10 +189,10 @@ def get_users_past_window( Parameters: ------------ - window_start_date : str + window_start_date : datetime the starting point of the window must be in format of the database which for now is %Y-%m-%d - window_end_date : str + window_end_date : datetime the ending point of the window must be in format of the database which for now is %Y-%m-%d collection : pymongo.collection.Collection @@ -206,8 +205,8 @@ def get_users_past_window( """ pipeline = [ # Filter documents based on date - {"$match": {"date": {"$gte": window_start_date, "$lte": window_end_date}}}, - {"$group": {"_id": "$account_name"}}, + {"$match": {"date": {"$gte": window_start_date, "$lt": window_end_date}}}, + {"$group": {"_id": "$user"}}, { "$group": { "_id": None, @@ -245,14 +244,14 @@ def get_latest_joined_users(db_access: DB_access, count: int = 5) -> list[str]: the userIds to use """ cursor = db_access.query_db_find( - table="guildmembers", - query={"isBot": False}, - feature_projection={"discordId": 1, "_id": 0}, - sorting=("joinedAt", -1), + table="rawmembers", + query={"is_bot": False}, + feature_projection={"id": 1, "_id": 0}, + sorting=("joined_at", -1), ).limit(count) usersId = list(cursor) - usersId = list(map(lambda x: x["discordId"], usersId)) + usersId = list(map(lambda x: x["id"], usersId)) return usersId @@ -262,53 +261,58 @@ def assess_engagement( accounts: list[str], action_params: dict[str, int], period_size: int, - db_access: DB_access, - channels: list[str], - analyze_dates: list[str], + platform_id: str, + resources: list[str], + resource_identifier: str, + analyze_dates: tuple[datetime, datetime], activities_name: list[str], activity_dict: dict[str, dict], - **kwargs, + analyzer_config: PlatformConfigBase, ) -> tuple[DiGraph, dict[str, dict]]: """ assess engagement of a window index for users - """ - activities_to_analyze = kwargs.get( - "activities_to_analyze", - [ - DiscordActivity.Mention, - DiscordActivity.Reply, - DiscordActivity.Reaction, - DiscordActivity.Lone_msg, - DiscordActivity.Thread_msg, - ], - ) - ignore_axis0 = kwargs.get( - "ignore_axis0", - [ - DiscordActivity.Mention, - ], - ) - ignore_axis1 = kwargs.get( - "ignore_axis1", - [ - DiscordActivity.Reply, - DiscordActivity.Reaction, - ], - ) + + hourly_analytics_using: list[str] = [] + raw_analytics_using: list[str] = [] + + ignore_axis0: list[str] = [] + + for config in analyzer_config.hourly_analytics: + if config.member_activities_used: + if config.type.value == "interactions": + logging.warning( + f"including hourly_analytics {config.name} as interaction! " + "Consider setting the `member_activities_used` of it to False." + " As the interacting user in " + "hourly_analytics interactions is not possible to identify" + ) + hourly_analytics_using.append(config.name) + + for config in analyzer_config.raw_analytics: + if config.member_activities_used: + raw_analytics_using.append(config.name) + + # in all cases of receiver and emitter + # the author of a message is the person + # receiving or emitting the activity + # ignore0 is for author + ignore_axis0.append(config.name) assess_engagment = EngagementAssessment( - activities=activities_to_analyze, + activities=hourly_analytics_using + raw_analytics_using, activities_ignore_0_axis=ignore_axis0, - activities_ignore_1_axis=ignore_axis1, + activities_ignore_1_axis=[], ) # obtain interaction matrix int_mat = compute_interaction_matrix_discord( - accounts, - analyze_dates, - channels, - db_access, - activities=activities_to_analyze, + acc_names=accounts, + date_range=analyze_dates, + resources=resources, + resource_identifier=resource_identifier, + platform_id=platform_id, + actions=hourly_analytics_using, + interactions=raw_analytics_using, ) # assess engagement diff --git a/discord_analyzer/analysis/neo4j_analysis/__init__.py b/tc_analyzer_lib/automation/__init__.py similarity index 100% rename from discord_analyzer/analysis/neo4j_analysis/__init__.py rename to tc_analyzer_lib/automation/__init__.py diff --git a/automation/automation_workflow.py b/tc_analyzer_lib/automation/automation_workflow.py similarity index 93% rename from automation/automation_workflow.py rename to tc_analyzer_lib/automation/automation_workflow.py index 688f2be..8f2b5da 100644 --- a/automation/automation_workflow.py +++ b/tc_analyzer_lib/automation/automation_workflow.py @@ -1,12 +1,11 @@ import logging from typing import Any -from automation.utils.automation_base import AutomationBase -from automation.utils.model import AutomationDB from pybars import Compiler +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.model import AutomationDB from tc_messageBroker.rabbit_mq.event import Event from tc_messageBroker.rabbit_mq.queue import Queue -from utils.get_guild_utils import get_guild_platform_id class AutomationWorkflow(AutomationBase): @@ -14,7 +13,7 @@ def __init__(self) -> None: super().__init__() self.automation_db = AutomationDB() - def start(self, guild_id: str): + def start(self, platform_id: str, guild_id: str): """ start the automation workflow for a guild @@ -48,7 +47,7 @@ def start(self, guild_id: str): members_by_category[category] = [] users1, users2 = self._get_users_from_memberactivities( - guild_id, category + platform_id, category ) users = self._subtract_users(users1, users2) @@ -81,7 +80,7 @@ def start(self, guild_id: str): compiled_message = action.template data = self._prepare_saga_data( - guild_id, user_id, compiled_message + platform_id, user_id, compiled_message ) saga_id = self._create_manual_saga(data) logging.info( @@ -111,7 +110,7 @@ def start(self, guild_id: str): for recipent in at.report.recipientIds: data = self._prepare_saga_data( - guild_id, recipent, compiled_message + platform_id, recipent, compiled_message ) saga_id = self._create_manual_saga(data) @@ -181,21 +180,20 @@ def _compile_message(self, data: dict[str, str], message: str) -> str: return compiled_message def _prepare_saga_data( - self, guild_id: str, user_id: str, message: str + self, platform_id: str, user_id: str, message: str ) -> dict[str, Any]: """ prepare the data needed for the saga Parameters: ------------ - guild_id : str - the guild_id having the user + platform_id : str + the platform_id having the user user_id : str the user_id to send message message : str the message to send the user """ - platform_id = get_guild_platform_id(guild_id) data = { "platformId": platform_id, "created": False, diff --git a/discord_analyzer/analysis/neo4j_utils/__init__.py b/tc_analyzer_lib/automation/utils/__init__.py similarity index 100% rename from discord_analyzer/analysis/neo4j_utils/__init__.py rename to tc_analyzer_lib/automation/utils/__init__.py diff --git a/automation/utils/automation_base.py b/tc_analyzer_lib/automation/utils/automation_base.py similarity index 88% rename from automation/utils/automation_base.py rename to tc_analyzer_lib/automation/utils/automation_base.py index bccc619..54e2164 100644 --- a/automation/utils/automation_base.py +++ b/tc_analyzer_lib/automation/utils/automation_base.py @@ -2,8 +2,8 @@ from typing import Any from uuid import uuid1 -from utils.mongo import MongoSingleton -from utils.rabbitmq import RabbitMQSingleton +from tc_analyzer_lib.utils.mongo import MongoSingleton +from tc_analyzer_lib.utils.rabbitmq import RabbitMQAccess class AutomationBase: @@ -12,7 +12,7 @@ def __init__(self) -> None: utilities for automation workflow """ self.mongo_client = MongoSingleton.get_instance().get_client() - self.rabbitmq = RabbitMQSingleton.get_instance().get_client() + self.rabbitmq = RabbitMQAccess.get_instance().get_client() def _get_users_from_guildmembers( self, guild_id: str, user_ids: list[str], strategy: str = "ngu" @@ -62,7 +62,7 @@ def _get_users_from_guildmembers( return users_data def _get_users_from_memberactivities( - self, guild_id: str, category: str + self, db_name: str, category: str ) -> tuple[list[str], list[str]]: """ get the users of memberactivities within a specific memberactivities @@ -70,8 +70,8 @@ def _get_users_from_memberactivities( Parameters: ------------- - guild_id : str - the guild id to get people's id + db_name : str + the database to get people's id category : str the category of memberactivities @@ -83,25 +83,21 @@ def _get_users_from_memberactivities( the users from past two days """ projection = {category: 1, "date": 1, "_id": 0} - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) users = ( - self.mongo_client[guild_id]["memberactivities"] + self.mongo_client[db_name]["memberactivities"] .find( { - "$or": [ - {"date": date_yesterday}, - {"date": date_two_past_days}, - ] + "date": { + "$gte": date_two_past_days, + "$lte": date_yesterday, + } }, projection, ) diff --git a/automation/utils/interfaces.py b/tc_analyzer_lib/automation/utils/interfaces.py similarity index 100% rename from automation/utils/interfaces.py rename to tc_analyzer_lib/automation/utils/interfaces.py diff --git a/automation/utils/model.py b/tc_analyzer_lib/automation/utils/model.py similarity index 93% rename from automation/utils/model.py rename to tc_analyzer_lib/automation/utils/model.py index 1f4dc09..3eded62 100644 --- a/automation/utils/model.py +++ b/tc_analyzer_lib/automation/utils/model.py @@ -1,5 +1,5 @@ -from utils.get_automation_env import get_automations_env -from utils.mongo import MongoSingleton +from tc_analyzer_lib.utils.get_automation_env import get_automations_env +from tc_analyzer_lib.utils.mongo import MongoSingleton from .interfaces import Automation diff --git a/discord_analyzer/analysis/utils/__init__.py b/tc_analyzer_lib/metrics/__init__.py similarity index 100% rename from discord_analyzer/analysis/utils/__init__.py rename to tc_analyzer_lib/metrics/__init__.py diff --git a/tc_analyzer_lib/metrics/analyzer_memberactivities.py b/tc_analyzer_lib/metrics/analyzer_memberactivities.py new file mode 100644 index 0000000..976ad10 --- /dev/null +++ b/tc_analyzer_lib/metrics/analyzer_memberactivities.py @@ -0,0 +1,146 @@ +import logging +from datetime import datetime, timedelta + +from tc_analyzer_lib.algorithms.compute_member_activity import compute_member_activity +from tc_analyzer_lib.metrics.memberactivity_utils import MemberActivityUtils +from tc_analyzer_lib.models.MemberActivityModel import MemberActivityModel +from tc_analyzer_lib.models.RawInfoModel import RawInfoModel +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class MemberActivities: + def __init__( + self, + platform_id: str, + resources: list[str], + action_config: dict[str, int], + window_config: dict[str, int], + analyzer_config: PlatformConfigBase, + analyzer_period: datetime, + ) -> None: + self.platform_id = platform_id + self.resources = resources + self.action_config = action_config + self.window_config = window_config + self.analyzer_config = analyzer_config + self.analyzer_period = analyzer_period + self.utils = MemberActivityUtils() + + def analysis_member_activity( + self, from_start: bool = False + ) -> tuple[list[dict], list]: + """ + Based on the rawdata creates and stores the member activity data + + Parameters: + ------------- + from_start : bool + do the analytics from scrach or not + if True, if wouldn't pay attention to the existing data in memberactivities + and will do the analysis from the first date + + Returns: + --------- + memberactivity_results : list of dictionary + the list of data analyzed + also the return could be None if no database for platform + or no raw info data was available + memberactivity_networkx_results : list of networkx objects + the list of data analyzed in networkx format + also the return could be None if no database for platform + or no raw info data was available + """ + guild_msg = f"PLATFORMID: {self.platform_id}:" + + client = MongoSingleton.get_instance().get_client() + + # check current platform is exist + if self.platform_id not in client.list_database_names(): + logging.error(f"{guild_msg} Database {self.platform_id} doesn't exist") + logging.error(f"{guild_msg} No such databse!") + logging.info(f"{guild_msg} Continuing") + return (None, None) + + member_activity_c = MemberActivityModel(client[self.platform_id]) + rawinfo_c = RawInfoModel(client[self.platform_id]) + + # Testing if there are entries in the rawinfo collection + if rawinfo_c.count() == 0: + logging.warning( + f"No entries in the collection 'rawmemberactivities' in {self.platform_id} databse" + ) + return (None, None) + + # get date range to be analyzed + today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + + logging.info(f"{guild_msg} memberactivities Analysis started!") + + # initialize + load_past_data = False + + # if we had data from past to use + if member_activity_c.count() != 0: + load_past_data = True + + load_past_data = load_past_data and not from_start + + first_date = self.analyzer_period.replace( + hour=0, minute=0, second=0, microsecond=0 + ) + if first_date is None: + logging.error( + f"No platform: {self.platform_id} available in platforms.core!" + ) + return None, None + + last_date = today - timedelta(days=1) + + date_range: list[datetime] = [first_date, last_date] + + if load_past_data: + period_size = self.window_config["period_size"] + num_days_to_load = ( + max( + [ + self.action_config["CON_T_THR"], + self.action_config["VITAL_T_THR"], + self.action_config["STILL_T_THR"], + self.action_config["PAUSED_T_THR"], + ] + ) + + 1 + ) * period_size + date_range[0] = date_range[1] - timedelta(days=num_days_to_load) + + # if the date range goes back more than the "7 days `period` forward" + if date_range[0] < self.analyzer_period + timedelta(days=period_size): + date_range[0] = self.analyzer_period + timedelta(days=period_size) + + # get all users during date_range + all_users = self.utils.get_all_users(self.platform_id) + + networkx_objects, activities = compute_member_activity( + platform_id=self.platform_id, + resources=self.resources, + resource_identifier=self.analyzer_config.resource_identifier, + acc_names=all_users, + date_range=date_range, + window_param=self.window_config, + act_param=self.action_config, + load_past_data=load_past_data, + analyzer_config=self.analyzer_config, + ) + + if not from_start: + # first date of storing the data + first_storing_date = member_activity_c.get_last_date() + activities = self.utils.refine_memberactivities_data( + activities, first_storing_date + ) + + memberactivity_results = activities + memberactivity_networkx_results = networkx_objects + + return memberactivity_results, memberactivity_networkx_results diff --git a/tc_analyzer_lib/metrics/heatmaps/__init__.py b/tc_analyzer_lib/metrics/heatmaps/__init__.py new file mode 100644 index 0000000..ce32bed --- /dev/null +++ b/tc_analyzer_lib/metrics/heatmaps/__init__.py @@ -0,0 +1,4 @@ +# flake8: noqa +from .analytics_hourly import AnalyticsHourly +from .analytics_raw import AnalyticsRaw +from .heatmaps import Heatmaps diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py new file mode 100644 index 0000000..cb848f2 --- /dev/null +++ b/tc_analyzer_lib/metrics/heatmaps/analytics_hourly.py @@ -0,0 +1,184 @@ +from datetime import date, datetime, time, timedelta +from typing import Any + +import numpy as np +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class AnalyticsHourly: + def __init__(self, platform_id: str) -> None: + client = MongoSingleton.get_instance().get_client() + # `rawmemberactivities` is the collection we would use for analytics + self.collection = client[platform_id]["rawmemberactivities"] + self.msg_prefix = f"PLATFORMID: {platform_id}:" + + def analyze( + self, + day: date, + activity: str, + activity_name: str, + activity_direction: str, + author_id: str | int, + **kwargs, + ) -> list[int]: + """ + analyze the hourly the messages + + Parameters + ------------ + day : date + analyze for a specific day + activity : str + the activity to be `actions` or `interactions` + activity_name : str + the activity name to be used from `rawmemberactivities` data + could be `reply`, `mention`, `message`, `commit` or any other + thing that is available on `rawmemberactivities` data + author_id : str + the author to filter data for + activity_direction : str + should be always either `emitter` or `receiver` + **kwargs : + additional_filters : dict[str, str] + the additional filtering for `rawmemberactivities` data of each platform + the keys could be `metadata.channel_id` with a specific value + """ + additional_filters: dict[str, str] = kwargs.get("additional_filters", {}) + + if activity_direction not in ["emitter", "receiver"]: + raise AttributeError( + "Wrong activity_direction given, " + "should be either `emitter` or `receiver`!" + ) + + if activity not in ["interactions", "actions"]: + raise AttributeError( + "Wrong `activity` given, " + "should be either `interactions` or `actions`" + ) + + activity_vector = self.get_hourly_analytics( + day=day, + activity=activity, + author_id=author_id, + filters={ + f"{activity}.name": activity_name, + f"{activity}.type": activity_direction, + **additional_filters, + }, + ) + + return activity_vector + + def get_hourly_analytics( + self, + day: date, + activity: str, + author_id: str | int, + filters: dict[str, dict[str, Any] | str] | None = None, + ) -> list[int]: + """ + Gets the list of documents for the stated day + + Parameters + ------------ + day : date + a specific day date + activity : str + to be `interactions` or `actions` + filter : dict[str, dict[str] | str] | None + the filtering that we need to apply + for default it is an None meaning + no filtering would be applied + msg : str + additional information to be logged + for default is empty string meaning no additional string to log + + Returns + --------- + hourly_analytics : list[int] + a vector with length of 24 + each index representing the count of activity for that day + """ + start_day = datetime.combine(day, time(0, 0, 0)) + end_day = start_day + timedelta(days=1) + + pipeline = [ + # the day for analytics + { + "$match": { + "date": {"$gte": start_day, "$lt": end_day}, + "author_id": author_id, + } + }, + # Unwind the activity array + {"$unwind": f"${activity}"}, + ] + if filters is not None: + pipeline.append( + {"$match": filters}, + ) + + # we need to count each enaged user as an interaction + if activity == "interactions": + pipeline.extend( + [ + {"$unwind": "$interactions.users_engaged_id"}, + # ignoring self-interactions + { + "$match": { + "$expr": { + "$ne": ["$interactions.users_engaged_id", "$author_id"] + } + } + }, + ] + ) + + pipeline.extend( + [ + # Add a field for the hour of the day from the date field + {"$addFields": {"hour": {"$hour": "$date"}}}, + # Group by the hour and count the number of mentions + {"$group": {"_id": "$hour", "count": {"$sum": 1}}}, + # Project the results into the desired format + {"$sort": {"_id": 1}}, # sorted by hour + ] + ) + + # Execute the aggregation pipeline + cursor = self.collection.aggregate(pipeline) + results = list(cursor) + + hourly_analytics = self._process_vectors(results) + return hourly_analytics + + def _process_vectors( + self, analytics_mongo_results: list[dict[str, int]] + ) -> list[int]: + """ + post process the mongodb query aggregation results + + Parameters + ------------ + analytics_mongo_results : list[dict[str, int]] + the mongodb query aggregation results + the format of the data should be as below + `[{'_id': 0, 'count': 2}, {'_id': 1, 'count': 1}, ...]` + the `_id` is hour and `count` is the count of user activity + + Returns + --------- + hourly_analytics : list[int] + a vector with length of 24 + each index representing the count of actions/interactions for that day + """ + hourly_analytics = np.zeros(24) + + for analytics in analytics_mongo_results: + hour = analytics["_id"] + activity_count = analytics["count"] + + hourly_analytics[hour] = activity_count + + return list(hourly_analytics) diff --git a/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py new file mode 100644 index 0000000..9f7f1da --- /dev/null +++ b/tc_analyzer_lib/metrics/heatmaps/analytics_raw.py @@ -0,0 +1,190 @@ +import logging +from datetime import date, datetime, time, timedelta +from typing import Any + +from tc_analyzer_lib.schemas import RawAnalyticsItem +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class AnalyticsRaw: + def __init__(self, platform_id: str) -> None: + client = MongoSingleton.get_instance().get_client() + # `rawmemberactivities` is the collection we would use for analytics + self.collection = client[platform_id]["rawmemberactivities"] + self.msg_prefix = f"PLATFORMID: {platform_id}:" + + def analyze( + self, + day: date, + activity: str, + activity_name: str, + activity_direction: str, + author_id: int, + **kwargs, + ) -> list[RawAnalyticsItem]: + """ + analyze the count of messages + + Parameters + ------------ + day : datetime.date + analyze for a specific day + activity : str + the activity to be `actions` or `interactions` + activity_name : str + the activity name to be used from `rawmemberactivities` data + could be `reply`, `mention`, `message`, `commit` or any other + thing that is available on `rawmemberactivities` data + author_id : str + the author to filter data for + activity_direction : str + should be always either `emitter` or `receiver` + **kwargs : + additional_filters : dict[str, str] + the additional filtering for `rawmemberactivities` data of each platform + the keys could be `metadata.channel_id` with a specific value + + Returns + --------- + activity_count : RawAnalyticsItem + raw analytics item which holds the user and + the count of interaction in that day + """ + additional_filters: dict[str, str] = kwargs.get("additional_filters", {}) + + if activity_direction not in ["emitter", "receiver"]: + raise ValueError( + "Wrong activity_direction given, " + "should be either `emitter` or `receiver`!" + ) + + if activity not in ["interactions", "actions"]: + raise ValueError( + "Wrong `activity` given, " + "should be either `interactions` or `actions`!" + f" The provided one is {activity}" + ) + + activity_count = self.get_analytics_count( + day=day, + activity=activity, + author_id=author_id, + activity_name=activity_name, + activity_direction=activity_direction, + filters=additional_filters, + ) + + return activity_count + + def get_analytics_count( + self, + day: date, + activity: str, + activity_name: str, + author_id: str | int, + activity_direction: str, + **kwargs, + ) -> list[RawAnalyticsItem]: + """ + Gets the list of documents for the stated day + + Parameters + ------------ + day : date + a specific day date + activity : str + to be `interactions` or `actions` + activity_name : str + the activity name to do filtering + could be `reply`, `reaction`, `mention, or ... + author_id : str | int + the author to do analytics on its data + activity_direction : str + the direction of activity + could be `emitter` or `receiver` + **kwargs : dict + filters : dict[str, dict[str] | str] + the filtering that we need to apply + for default it is an None meaning + no filtering would be applied + + Returns + --------- + activity_count : list[RawAnalyticsItem] + raw analytics item which holds the user and + the count of interaction in that day + """ + filters: dict[str, dict[str, Any] | str] | None = kwargs.get("filters") + start_day = datetime.combine(day, time(0, 0, 0)) + end_day = start_day + timedelta(days=1) + + match_filters = { + "date": {"$gte": start_day, "$lt": end_day}, + "author_id": author_id, + } + if filters is not None: + match_filters = { + **match_filters, + **filters, + } + + pipeline = [ + { + "$match": { + **match_filters, + } + }, + {"$unwind": f"${activity}"}, + { + "$match": { + f"{activity}.name": activity_name, + f"{activity}.type": activity_direction, + }, + }, + {"$unwind": f"${activity}.users_engaged_id"}, + {"$group": {"_id": f"${activity}.users_engaged_id", "count": {"$sum": 1}}}, + ] + + cursor = self.collection.aggregate(pipeline) + db_result = list(cursor) + activity_count = self._prepare_raw_analytics_item(author_id, db_result) + + return activity_count + + def _prepare_raw_analytics_item( + self, + author_id: str | int, + activities_data: list[dict[str, str | int]], + ) -> list[RawAnalyticsItem]: + """ + post process the database results + + this will take the format `[{'_id': 9000, 'count': 4}]` and output a RawAnalyticsItem + + Parameters + ------------ + author_id : str + just for skipping self-interactions + activities_data : dict[str, str | int] + the user interaction count. + the data will be as an example `[{'_id': 9000, 'count': 4}]` + _id would be the users interacting with + + Returns + -------- + raw_analytics : list[RawAnalyticsItem] + the data in format of raw analytics item + """ + analytics: list[RawAnalyticsItem] = [] + for data in activities_data: + if data["_id"] != author_id: + raw_analytics = RawAnalyticsItem( + account=data["_id"], # type: ignore + count=data["count"], # type: ignore + ) + analytics.append(raw_analytics) + else: + # self interaction + logging.info("Skipping self-interaction!") + + return analytics diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py new file mode 100644 index 0000000..40a36f9 --- /dev/null +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps.py @@ -0,0 +1,260 @@ +import logging +from datetime import date, datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import AnalyticsHourly, AnalyticsRaw +from tc_analyzer_lib.metrics.heatmaps.heatmaps_utils import HeatmapsUtils +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class Heatmaps: + def __init__( + self, + platform_id: str, + period: datetime, + resources: list[str], + analyzer_config: PlatformConfigBase, + ) -> None: + """ + Heatmaps analytics wrapper + + Parameters + ------------ + platform_id : str + the platform that we want heatmaps analytics for + period : datetime + the date that analytics could be started + resources : list[str] + a list of resources id + i.e. a list of `channel_id` for discord or `chat_id` for telegram + analyzer_config : PlatformConfigBase + the configuration for analytics job + should be a class inheriting from `PlatformConfigBase` and with predefined values + """ + self.platform_id = platform_id + self.resources = resources + self.client = MongoSingleton.get_instance().get_client() + self.period = period + + self.analyzer_config = analyzer_config + self.utils = HeatmapsUtils(platform_id) + + def start(self, from_start: bool = False) -> list[dict]: + """ + Based on the rawdata creates and stores the heatmap data + + Parameters: + ------------- + from_start : bool + do the analytics from scrach or not + if True, if wouldn't pay attention to the existing data in heatmaps + and will do the analysis from the first date + + Returns: + --------- + heatmaps_results : list of dictionary + the list of data analyzed + also the return could be None if no database for guild + or no raw info data was available + """ + log_prefix = f"PLATFORMID: {self.platform_id}:" + + last_date = self.utils.get_last_date() + + analytics_date: datetime + if last_date is None or from_start: + analytics_date = self.period + else: + analytics_date = last_date + timedelta(days=1) + + # initialize the data array + heatmaps_results = [] + + users_count = self.utils.get_users_count() + + iteration_count = self._compute_iteration_counts( + analytics_date=analytics_date, + resources_count=len(self.resources), + authors_count=users_count, + ) + + index = 0 + while analytics_date.date() < datetime.now().date(): + for resource_id in self.resources: + # for more efficient retrieval + # we're always using the cursor and re-querying the db + user_ids_cursor = self.utils.get_users() + + for author in user_ids_cursor: + logging.info( + f"{log_prefix} ANALYZING HEATMAPS {index}/{iteration_count}" + ) + index += 1 + + author_id = author["id"] + doc_date = analytics_date.date() + document = { + self.analyzer_config.resource_identifier: resource_id, + "date": datetime(doc_date.year, doc_date.month, doc_date.day), + "user": author_id, + } + hourly_analytics = self._process_hourly_analytics( + day=analytics_date, + resource=resource_id, + author_id=author_id, + ) + + raw_analytics = self._process_raw_analytics( + day=analytics_date, + resource=resource_id, + author_id=author_id, + ) + document = {**document, **hourly_analytics, **raw_analytics} + + heatmaps_results.append(document) + + # analyze next day + analytics_date += timedelta(days=1) + + return heatmaps_results + + def _process_hourly_analytics( + self, + day: date, + resource: str, + author_id: str | int, + ) -> dict[str, list]: + """ + start processing hourly analytics for a day based on given config + + Parameters + ------------ + day : date + analyze for a specific day + resurce : str + the resource we want to apply the filtering on + author_id : str | int + the author to filter data for + """ + analytics_hourly = AnalyticsHourly(self.platform_id) + analytics: dict[str, list[int]] = {} + for config in self.analyzer_config.hourly_analytics: + # if it was a predefined analytics + if config.name in [ + "replied", + "replier", + "mentioner", + "mentioned", + "reacter", + "reacted", + ]: + activity_name: str + if config.name in ["replied", "replier"]: + activity_name = "reply" + elif config.name in ["mentioner", "mentioned"]: + activity_name = "mention" + else: + activity_name = "reaction" + + analytics_vector = analytics_hourly.analyze( + day=day, + activity=config.type.value, + activity_name=activity_name, + activity_direction=config.direction.value, + author_id=author_id, + additional_filters={ + f"metadata.{self.analyzer_config.resource_identifier}": resource, + }, + ) + analytics[config.name] = analytics_vector + + # if it was a custom analytics that we didn't write code + # the mongodb condition is given in their configuration + else: + conditions = config.rawmemberactivities_condition + + if config.activity_name is None or conditions is None: + raise ValueError( + "For custom analytics the `activity_name` and `conditions`" + "in analyzer config shouldn't be None" + ) + + activity_name = config.activity_name + + analytics_vector = analytics_hourly.analyze( + day=day, + activity=config.type.value, + activity_name=activity_name, + activity_direction=config.direction.value, + author_id=author_id, + additional_filters={ + f"metadata.{self.analyzer_config.resource_identifier}": resource, + **conditions, + }, + ) + analytics[config.name] = analytics_vector + + return analytics + + def _process_raw_analytics( + self, + day: date, + resource: str, + author_id: str | int, + ) -> dict[str, list[dict]]: + analytics_raw = AnalyticsRaw(self.platform_id) + analytics: dict[str, list[dict]] = {} + + for config in self.analyzer_config.raw_analytics: + # default analytics that we always can have + activity_name: str + if config.name == "reacted_per_acc": + activity_name = "reaction" + elif config.name == "mentioner_per_acc": + activity_name = "mention" + elif config.name == "replied_per_acc": + activity_name = "reply" + else: + # custom analytics + if config.activity_name is None: + raise ValueError( + "`activity_name` for custom analytics should be provided" + ) + activity_name = config.activity_name + + additional_filters: dict[str, str] = { + f"metadata.{self.analyzer_config.resource_identifier}": resource, + } + # preparing for custom analytics (if available in config) + if config.rawmemberactivities_condition is not None: + additional_filters = { + **additional_filters, + **config.rawmemberactivities_condition, + } + + analytics_items = analytics_raw.analyze( + day=day, + activity=config.type.value, + activity_name=activity_name, + activity_direction=config.direction.value, + author_id=author_id, + additional_filters=additional_filters, + ) + + # converting to dict data + # so we could later save easily in db + analytics[config.name] = [item.to_dict() for item in analytics_items] + + return analytics + + def _compute_iteration_counts( + self, + analytics_date: datetime, + resources_count: int, + authors_count: int, + ) -> int: + iteration_count = ( + (datetime.now() - analytics_date).days * resources_count * authors_count + ) + + return iteration_count diff --git a/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py new file mode 100644 index 0000000..60abaf1 --- /dev/null +++ b/tc_analyzer_lib/metrics/heatmaps/heatmaps_utils.py @@ -0,0 +1,68 @@ +from datetime import datetime + +from pymongo.cursor import Cursor +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class HeatmapsUtils: + def __init__(self, platform_id: str) -> None: + self.platform_id = platform_id + client = MongoSingleton.get_instance().get_client() + self.database = client[platform_id] + + def get_users(self, is_bot: bool = False) -> Cursor: + """ + get the users of a platform + + Parameters + ----------- + is_bot : bool + if we want to fetch the bots + for default is False meaning the real users will be returned + + Returns: + --------- + bots : pymongo.cursor.Cursor + MongoDB cursor for users + in case of large amount of data we should loop over this + the cursor data format would be as `{'id': xxxx}` + """ + cursor = self.database["rawmembers"].find( + {"is_bot": is_bot}, {"_id": 0, "id": 1} + ) + return cursor + + def get_users_count(self, is_bot: bool = False) -> int: + """ + get the count of users + + Parameters + ----------- + is_bot : bool + if we want to fetch the bots + for default is False meaning the real users will be returned + + Returns + --------- + users_count : int + the count of users + """ + users_count = self.database["rawmembers"].count_documents( + {"is_bot": is_bot}, + ) + return users_count + + def get_last_date(self) -> datetime | None: + """ + get the last document's date + """ + cursor = ( + self.database["heatmaps"] + .find({}, {"date": 1, "_id": 0}) + .sort("date", -1) + .limit(1) + ) + documents = list(cursor) + last_date = documents[0]["date"] if documents != [] else None + + return last_date diff --git a/discord_analyzer/analyzer/memberactivity_utils.py b/tc_analyzer_lib/metrics/memberactivity_utils.py similarity index 69% rename from discord_analyzer/analyzer/memberactivity_utils.py rename to tc_analyzer_lib/metrics/memberactivity_utils.py index 25d559f..0f3a41b 100644 --- a/discord_analyzer/analyzer/memberactivity_utils.py +++ b/tc_analyzer_lib/metrics/memberactivity_utils.py @@ -1,12 +1,12 @@ import logging from dateutil import parser -from discord_analyzer.DB_operations.mongo_neo4j_ops import MongoNeo4jDB +from tc_analyzer_lib.utils.mongo import MongoSingleton class MemberActivityUtils: - def __init__(self, DB_connection: MongoNeo4jDB) -> None: - self.DB_connection = DB_connection + def __init__(self) -> None: + self.client = MongoSingleton.get_instance().get_client() def refine_memberactivities_data(self, all_member_activities, first_date): """ @@ -21,10 +21,9 @@ def refine_memberactivities_data(self, all_member_activities, first_date): the first date of saving date we would use this to specify the exact data activity to save """ - data_to_save = [] for activity in all_member_activities: - if first_date is None or parser.parse(activity["date"]) > first_date: + if first_date is None or activity["date"] > first_date: data_to_save.append(activity) return data_to_save @@ -33,9 +32,7 @@ def refine_memberactivities_data(self, all_member_activities, first_date): def get_one_guild(self, guild): """Get one guild setting from guilds collection by guild""" - result = self.DB_connection.mongoOps.mongo_db_access.db_mongo_client["Core"][ - "platforms" - ].find_one({"metadata.id": guild}) + result = self.client["Core"]["platforms"].find_one({"metadata.id": guild}) return result # get all user accounts during date_range in guild from rawinfo data @@ -43,25 +40,23 @@ def get_all_users( self, guildId: str, ) -> list[str]: - # check guild is exist - - client = self.DB_connection.mongoOps.mongo_db_access.db_mongo_client + all_users: list[str] - if guildId not in client.list_database_names(): - logging.error(f"Database {guildId} doesn't exist") - logging.error(f"Existing databases: {client.list_database_names()}") - logging.info("Continuing") - return [] - - cursor = client[guildId]["guildmembers"].find( - { - "isBot": {"$ne": True}, - }, - {"discordId": 1, "_id": 0}, - ) - - users_data = list(cursor) - all_users = list(map(lambda x: x["discordId"], users_data)) + # check guild is exist + if guildId not in self.client.list_database_names(): + logging.error( + f"Database {guildId} doesn't exist! Returning empty array for users" + ) + all_users = [] + else: + cursor = self.client[guildId]["rawmembers"].find( + { + "is_bot": {"$ne": True}, + }, + {"id": 1, "_id": 0}, + ) + users_data = list(cursor) + all_users = list(map(lambda x: x["id"], users_data)) return all_users diff --git a/discord_analyzer/analyzer/neo4j_analytics.py b/tc_analyzer_lib/metrics/neo4j_analytics.py similarity index 54% rename from discord_analyzer/analyzer/neo4j_analytics.py rename to tc_analyzer_lib/metrics/neo4j_analytics.py index c2b0540..451615d 100644 --- a/discord_analyzer/analyzer/neo4j_analytics.py +++ b/tc_analyzer_lib/metrics/neo4j_analytics.py @@ -1,24 +1,34 @@ # A wrapper to compute the neo4j metrics in cron-job import logging -from discord_analyzer.analysis.neo4j_analysis.analyzer_node_stats import NodeStats -from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality -from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( +from tc_analyzer_lib.algorithms.neo4j_analysis.analyzer_node_stats import NodeStats +from tc_analyzer_lib.algorithms.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.algorithms.neo4j_analysis.local_clustering_coefficient import ( LocalClusteringCoeff, ) -from discord_analyzer.analysis.neo4j_analysis.louvain import Louvain +from tc_analyzer_lib.algorithms.neo4j_analysis.louvain import Louvain +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps class Neo4JAnalytics: - def __init__(self) -> None: + def __init__(self, platform_id: str, graph_schema: GraphSchema) -> None: """ neo4j metrics to be compute - input variables are all the neo4j credentials + + Parameters + ------------ + platform_id : str + the platform to compute analytics for + graph_schema : GraphSchema + the graph schema representative of node and relationship labels """ self.neo4j_ops = Neo4jOps.get_instance() + self.platform_id = platform_id + self.log_prefix = f"PLATFORMID: {platform_id} " + self.graph_schema = graph_schema - def compute_metrics(self, guildId: str, from_start: bool) -> None: + def compute_metrics(self, from_start: bool) -> None: """ compute the essential metrics we wanted for neo4j @@ -35,16 +45,12 @@ def compute_metrics(self, guildId: str, from_start: bool) -> None: # if from_start: # self._remove_analytics_interacted_in(guildId) - self.compute_louvain_algorithm(guildId, from_start) - self.compute_local_clustering_coefficient(guildId, from_start) - self.compute_network_decentrality(guildId, from_start) - self.compute_node_stats(guildId, from_start) + self.compute_louvain_algorithm(from_start) + self.compute_local_clustering_coefficient(from_start) + self.compute_network_decentrality(from_start) + self.compute_node_stats(from_start) - def compute_local_clustering_coefficient( - self, - guildId: str, - from_start: bool, - ): + def compute_local_clustering_coefficient(self, from_start: bool): """ compute localClusteringCoefficient @@ -57,20 +63,18 @@ def compute_local_clustering_coefficient( Note: only some metrics support this others would be computed from_start=True """ - msg = f"GUILDID: {guildId}:" try: # Local Clustering Coefficient - logging.info(f"{msg} Computing LocalClusteringCoefficient") - lcc = LocalClusteringCoeff() - lcc.compute(guildId=guildId, from_start=from_start) + logging.info(f"{self.log_prefix}Computing LocalClusteringCoefficient") + lcc = LocalClusteringCoeff(self.platform_id, self.graph_schema) + lcc.compute(from_start=from_start) except Exception as exp: logging.error( - f"{msg} Exception in computing LocalClusteringCoefficient, {exp}" + f"{self.log_prefix}Exception in computing LocalClusteringCoefficient, {exp}" ) def compute_fragmentation_score( self, - guildId: str, past_window_date: float, scale_fragmentation_score: int = 1, ): @@ -83,18 +87,16 @@ def compute_fragmentation_score( Parameters: -------------- - guildId : str - the guildId to use past_window_date : float the timestamp for window date scale_fragmentation_score : int scaling the fragmentation score by a value default is `1` meaning no scale """ - msg = f"GUILDID: {guildId}:" - logging.info(f"{msg} Averaging LocalClusteringCoefficient") - query = """ - MATCH ()-[r:INTERACTED_IN]->(g:Guild {guildId: $guildId }) + logging.info(f"{self.log_prefix}Averaging LocalClusteringCoefficient") + + query = f""" + MATCH ()-[r:{self.graph_schema.interacted_in_rel}]->(g:{self.graph_schema.platform_label} {{id: $platform_id }}) WHERE r.date >= $past_date WITH r.date as date, r.localClusteringCoefficient as lcc RETURN @@ -103,30 +105,27 @@ def compute_fragmentation_score( """ records, _, _ = self.neo4j_ops.neo4j_driver.execute_query( query, - guildId=guildId, + platform_id=self.platform_id, scale=scale_fragmentation_score, past_date=past_window_date, ) return records - def compute_network_decentrality(self, guildId: str, from_start: bool): + def compute_network_decentrality(self, from_start: bool): """ compute network decentrality and save results back to neo4j """ - msg = f"GUILDID: {guildId}:" try: - centrality = Centerality() + centrality = Centerality(self.platform_id, self.graph_schema) # degree decentrality - _ = centrality.compute_network_decentrality( - guildId=guildId, from_start=from_start - ) + _ = centrality.compute_network_decentrality(from_start=from_start) except Exception as exp: logging.error( - f"{msg} Exception occured in computing Network decentrality, {exp}!" + f"{self.log_prefix}Exception occured in computing Network decentrality, {exp}!" ) - def compute_node_stats(self, guildId: str, from_start: bool): + def compute_node_stats(self, from_start: bool): """ compute node stats each DiscordAccount node could be either @@ -134,15 +133,20 @@ def compute_node_stats(self, guildId: str, from_start: bool): - "1": Receiver - "2": Balanced """ - msg = f"GUILDID: {guildId}:" try: - logging.info(f"{msg}: computing node stats") - node_stats = NodeStats(threshold=2) - node_stats.compute_stats(guildId, from_start) + logging.info(f"{self.log_prefix} computing node stats") + node_stats = NodeStats( + platform_id=self.platform_id, + graph_schema=self.graph_schema, + threshold=2, + ) + node_stats.compute_stats(from_start) except Exception as exp: - logging.error(f"{msg} Exception occured in node stats computation, {exp}") + logging.error( + f"{self.log_prefix}Exception occured in node stats computation, {exp}" + ) - def _remove_analytics_interacted_in(self, guildId: str) -> None: + def _remove_analytics_interacted_in(self) -> None: """ Remove the INTERACTED_IN relations Note: we saved those under the INTERACTED_IN relation @@ -153,13 +157,15 @@ def _remove_analytics_interacted_in(self, guildId: str) -> None: the guild we want to delete the relations for """ with self.neo4j_ops.neo4j_driver.session() as session: - query = """ - MATCH (:DiscordAccount) -[r:INTERACTED_IN]->(:Guild {guildId: $guildId}) + query = f""" + MATCH (:{self.graph_schema.user_label}) -[ + r:{self.graph_schema.interacted_in_rel} + ]->(:{self.graph_schema.platform_label} {{id: $platform_id}}) DELETE r """ - session.run(query=query, guildId=guildId) + session.run(query=query, platform_id=self.platform_id) - def compute_louvain_algorithm(self, guild_id: str, from_start: bool) -> None: + def compute_louvain_algorithm(self, from_start: bool) -> None: """ compute the louvain algorithm and save the results within the db @@ -170,6 +176,5 @@ def compute_louvain_algorithm(self, guild_id: str, from_start: bool) -> None: from_start : bool compute from the start of the data available or continue the previous """ - louvain = Louvain() - - louvain.compute(guild_id, from_start) + louvain = Louvain(self.platform_id, self.graph_schema) + louvain.compute(from_start) diff --git a/tc_analyzer_lib/metrics/utils/__init__.py b/tc_analyzer_lib/metrics/utils/__init__.py new file mode 100644 index 0000000..1fdacc2 --- /dev/null +++ b/tc_analyzer_lib/metrics/utils/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from .platform import Platform diff --git a/tc_analyzer_lib/metrics/utils/analyzer_db_manager.py b/tc_analyzer_lib/metrics/utils/analyzer_db_manager.py new file mode 100644 index 0000000..96394b2 --- /dev/null +++ b/tc_analyzer_lib/metrics/utils/analyzer_db_manager.py @@ -0,0 +1,16 @@ +from tc_analyzer_lib.DB_operations.mongo_neo4j_ops import MongoNeo4jDB + + +class AnalyzerDBManager: + def __init__(self): + """ + base class for the analyzer + """ + pass + + def database_connect(self): + """ + Connect to the database + """ + self.DB_connections = MongoNeo4jDB(testing=False) + self.DB_connections.set_mongo_db_ops() diff --git a/tc_analyzer_lib/metrics/utils/platform.py b/tc_analyzer_lib/metrics/utils/platform.py new file mode 100644 index 0000000..5989a12 --- /dev/null +++ b/tc_analyzer_lib/metrics/utils/platform.py @@ -0,0 +1,147 @@ +from datetime import datetime + +from bson import ObjectId +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class Platform: + def __init__(self, platform_id: str) -> None: + """ + the utilities for platform + + Parameters + ------------ + platform_id : str + a specific platform's id + """ + self.platform_id = platform_id + self.client = MongoSingleton.get_instance().get_client() + + def check_existance(self) -> bool: + """ + check for existance of a platform + + Returns + ---------- + exists : bool + if the platform exist or not + """ + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(self.platform_id)}, + {"_id": 1}, + ) + exists: bool + if platform is None: + exists = False + else: + exists = True + + return exists + + def update_isin_progress(self): + """ + update isInProgress field of platforms collection + """ + existance = self.check_existance() + if existance is False: + raise AttributeError("No such a platform available!") + + self.client["Core"]["platforms"].update_one( + {"_id": ObjectId(self.platform_id)}, + {"$set": {"metadata.isInProgress": False}}, + ) + + def get_community_id(self) -> str: + """ + get the community id of a platform + + Returns + -------- + community_id : str + the community that the Platform is related to + """ + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(self.platform_id)}, {"community": 1} + ) + if platform is None: + raise ValueError( + f"No platform is available for the given platform: {self.platform_id}" + ) + + community_id = str(platform["community"]) + + return community_id + + def get_platform_period(self) -> datetime: + """ + get the period field for analyzer of a platform + + Returns + -------- + period : datetime + the period which the analyzer should start its work from + """ + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(self.platform_id)}, + {"metadata.period": 1}, + ) + + if platform is None: + raise AttributeError( + f"No such platform for platform_id: {self.platform_id}" + ) + + period = platform["metadata"]["period"] + return period + + def get_platform_resources(self) -> list[str]: + """ + get the platform resources id + This will do the initial filtering on data + + Returns + --------- + resources : list[str] + a list of resources to do filtering on data + """ + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(self.platform_id)}, + {"metadata.resources": 1}, + ) + + if platform is None: + raise AttributeError( + f"No such platform for platform_id: {self.platform_id}" + ) + + resources = platform["metadata"]["resources"] + return resources + + def get_platform_analyzer_params(self) -> tuple[dict[str, int], dict[str, int]]: + """ + get the platform's analyzer parameters + the parameters are `window` and `action` + + Returns + --------- + window : dict[str, int] + the window parameters + action : dict[str, int] + the action parameters to configura analyzer + """ + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(self.platform_id)}, + { + "metadata.window": 1, + "metadata.action": 1, + }, + ) + if platform is None: + raise AttributeError( + f"No such platform for platform_id: {self.platform_id}" + ) + + window = platform["metadata"]["window"] + action = platform["metadata"]["action"] + + return window, action diff --git a/discord_analyzer/models/BaseModel.py b/tc_analyzer_lib/models/BaseModel.py similarity index 98% rename from discord_analyzer/models/BaseModel.py rename to tc_analyzer_lib/models/BaseModel.py index d122926..7f287c6 100644 --- a/discord_analyzer/models/BaseModel.py +++ b/tc_analyzer_lib/models/BaseModel.py @@ -13,6 +13,7 @@ class BaseModel: def __init__(self, collection_name: str, database: Database): self.collection_name = collection_name self.database = database + self.collection = database[collection_name] self.exists = False def collection_exists(self): @@ -37,7 +38,6 @@ def insert_one(self, obj_dict): msg += "Collection does not exist" logging.info(msg) return - self.collection = self.database[self.collection_name] logging.info( f"Inserting guild object into the {self.collection_name} collection." ) diff --git a/discord_analyzer/models/GuildsRnDaoModel.py b/tc_analyzer_lib/models/GuildsRnDaoModel.py similarity index 96% rename from discord_analyzer/models/GuildsRnDaoModel.py rename to tc_analyzer_lib/models/GuildsRnDaoModel.py index a1f9557..ae2e6cb 100644 --- a/discord_analyzer/models/GuildsRnDaoModel.py +++ b/tc_analyzer_lib/models/GuildsRnDaoModel.py @@ -1,6 +1,5 @@ -#!/usr/bin/env python3 -from discord_analyzer.models.BaseModel import BaseModel from pymongo.database import Database +from tc_analyzer_lib.models.BaseModel import BaseModel class GuildsRnDaoModel(BaseModel): diff --git a/discord_analyzer/models/HeatMapModel.py b/tc_analyzer_lib/models/HeatMapModel.py similarity index 97% rename from discord_analyzer/models/HeatMapModel.py rename to tc_analyzer_lib/models/HeatMapModel.py index afcba0e..4c7bf15 100644 --- a/discord_analyzer/models/HeatMapModel.py +++ b/tc_analyzer_lib/models/HeatMapModel.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 from datetime import datetime -from discord_analyzer.models.BaseModel import BaseModel from pymongo import DESCENDING from pymongo.database import Database +from tc_analyzer_lib.models.BaseModel import BaseModel class HeatMapModel(BaseModel): diff --git a/discord_analyzer/models/MemberActivityModel.py b/tc_analyzer_lib/models/MemberActivityModel.py similarity index 83% rename from discord_analyzer/models/MemberActivityModel.py rename to tc_analyzer_lib/models/MemberActivityModel.py index 6b3c256..c6f4121 100644 --- a/discord_analyzer/models/MemberActivityModel.py +++ b/tc_analyzer_lib/models/MemberActivityModel.py @@ -1,9 +1,8 @@ #!/usr/bin/env python3 import logging -from datetime import datetime import pymongo -from discord_analyzer.models.BaseModel import BaseModel +from tc_analyzer_lib.models.BaseModel import BaseModel class MemberActivityModel(BaseModel): @@ -18,15 +17,13 @@ def get_last_date(self): Gets the date of the last document """ try: - date_str = ( + date = ( self.database[self.collection_name] .find() .sort([("date", pymongo.DESCENDING)]) .limit(1)[0]["date"] ) - date_format = "%Y-%m-%dT%H:%M:%S" - date_object = datetime.strptime(date_str, date_format) - return date_object + return date except Exception as e: print(e) return None diff --git a/discord_analyzer/models/RawInfoModel.py b/tc_analyzer_lib/models/RawInfoModel.py similarity index 87% rename from discord_analyzer/models/RawInfoModel.py rename to tc_analyzer_lib/models/RawInfoModel.py index 76f78fd..51f7f68 100644 --- a/discord_analyzer/models/RawInfoModel.py +++ b/tc_analyzer_lib/models/RawInfoModel.py @@ -3,15 +3,15 @@ from datetime import datetime, timedelta from typing import Any -from discord_analyzer.models.BaseModel import BaseModel from pymongo import ASCENDING from pymongo.database import Database +from tc_analyzer_lib.models.BaseModel import BaseModel class RawInfoModel(BaseModel): def __init__(self, database: Database): - super().__init__(collection_name="rawinfos", database=database) - self.guild_msg = f"GUILDID: {self.database.name}:" + super().__init__(collection_name="rawmemberactivities", database=database) + self.guild_msg = f"PLATFORMID: {self.database.name}:" def get_first_date(self): """ @@ -38,7 +38,7 @@ def get_day_entries(self, day: datetime, msg: str = "") -> list[dict[str, Any]]: `msg` parameter is for additional info to be logged """ - guild_msg = f"GUILDID: {self.database.name}:{msg}" + guild_msg = f"PLATFORMID: {self.database.name}:{msg}" start_day = day.replace(hour=0, minute=0, second=0) end_day = start_day + timedelta(days=1) diff --git a/discord_analyzer/analyzer/__init__.py b/tc_analyzer_lib/models/__init__.py similarity index 100% rename from discord_analyzer/analyzer/__init__.py rename to tc_analyzer_lib/models/__init__.py diff --git a/tc_analyzer_lib/models/raw_member_activities.py b/tc_analyzer_lib/models/raw_member_activities.py new file mode 100644 index 0000000..7ada9e3 --- /dev/null +++ b/tc_analyzer_lib/models/raw_member_activities.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +import logging +from datetime import datetime, time, timedelta + +import numpy as np +from tc_analyzer_lib.models.BaseModel import BaseModel +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class RawMemberActivities(BaseModel): + def __init__(self, platform_id: str): + client = MongoSingleton.get_instance().get_client() + super().__init__( + collection_name="rawmemberactivities", database=client[platform_id] + ) + self.msg_prefix = f"PLATFORMID: {platform_id}:" + + def get_hourly_analytics( + self, + day: datetime.date, + activity: str, + filters: dict[str, dict[str] | str] | None = None, + msg: str = "", + ) -> list[int]: + """ + Gets the list of documents for the stated day + + Parameters + ------------ + day : datetime.date + a specific day date + activity : str + to be `interactions` or `actions` + filter : dict[str, dict[str] | str] | None + the filtering that we need to apply + for default it is an None meaning + no filtering would be applied + msg : str + additional information to be logged + for default is empty string meaning no additional string to log + + Returns + --------- + hourly_analytics : list[int] + a vector with length of 24 + each index representing the count of activity for that day + """ + prefix = f"{self.msg_prefix} {msg}" + + if activity not in ["interactions", "actions"]: + raise ValueError( + f"{prefix} Wrong activity given!" + " Should be either `interactions`, or `actions`" + ) + + start_day = datetime.combine(day, time(0, 0, 0)) + end_day = start_day + timedelta(days=1) + + logg_msg = f"{prefix} Fetching documents |" + logg_msg += f" {self.collection_name}: {start_day} -> {end_day}" + logging.info(logg_msg) + + pipeline = [ + # the day for analytics + {"$match": {"date": {"$gte": start_day, "$lt": end_day}}}, + # Unwind the activity array + {"$unwind": f"${activity}"}, + ] + if filters is not None: + pipeline.append( + {"$match": filters}, + ) + + pipeline.extend( + [ + # Add a field for the hour of the day from the date field + {"$addFields": {"hour": {"$hour": "$date"}}}, + # Group by the hour and count the number of mentions + {"$group": {"_id": "$hour", "count": {"$sum": 1}}}, + # Project the results into the desired format + {"$sort": {"_id": 1}}, # sorted by hour + ] + ) + + # Execute the aggregation pipeline + cursor = self.collection.aggregate(pipeline) + results = list(cursor) + + hourly_analytics = self._process_vectors(results) + return hourly_analytics + + def _process_vectors( + self, analytics_mongo_results: list[dict[str, int]] + ) -> list[int]: + """ + post process the mongodb query aggregation results + + Parameters + ------------ + analytics_mongo_results : list[dict[str, int]] + the mongodb query aggregation results + the format of the data should be as below + `[{'_id': 0, 'count': 2}, {'_id': 1, 'count': 1}, ...]` + the `_id` is hour and `count` is the count of user activity + + Returns + --------- + hourly_analytics : list[int] + a vector with length of 24 + each index representing the count of actions/interactions for that day + """ + hourly_analytics = np.zeros(24) + + for analytics in analytics_mongo_results: + hour = analytics["_id"] + if hour < 0 or hour > 24: + raise ValueError("Wrong hour given from mongodb query!") + activity_count = analytics["count"] + + hourly_analytics[hour] = activity_count + + return list(hourly_analytics) diff --git a/tc_analyzer_lib/publish_on_success.py b/tc_analyzer_lib/publish_on_success.py new file mode 100644 index 0000000..dc9d0ca --- /dev/null +++ b/tc_analyzer_lib/publish_on_success.py @@ -0,0 +1,73 @@ +import logging + +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.utils.get_guild_utils import ( + get_platform_community_owner, + get_platform_guild_id, + get_platform_name, +) +from tc_analyzer_lib.utils.rabbitmq import RabbitMQAccess +from tc_messageBroker.rabbit_mq.event import Event +from tc_messageBroker.rabbit_mq.queue import Queue + + +def publish_on_success(platform_id: str, recompute: bool) -> None: + """ + publish a message to discord platform for a specific platform + telling the Community Manager (CM) the work is finished and run automation + + Note: this will work just for discord platform! + + Parameters + ------------ + platform_id : str + the discord platform to send message to + recompute : bool + if recompute equal to `True` then publish the job finished message for CM + else, just run the automation + """ + + msg = f"PLATFORMID: {platform_id}: " + logging.info(f"{msg}publishing task done to CM") + + guild_id = get_platform_guild_id(platform_id) + platform_name = get_platform_name(platform_id) + + automation_workflow = AutomationWorkflow() + # working specifically for discord + if platform_name == "discord" and recompute: + logging.info(f"{msg}Sending job finished message & starting automation!") + rabbitmq = RabbitMQAccess.get_instance().get_client() + + message = ( + "Your data import into TogetherCrew is complete! " + "See your insights on your dashboard https://app.togethercrew.com/." + " If you have questions send a DM to katerinabc (Discord) or k_bc0 (Telegram)." + ) + owner_discord_id = get_platform_community_owner(platform_id) + data = { + "platformId": platform_id, + "created": False, + "discordId": owner_discord_id, + "message": message, # the message to send + "userFallback": True, + } + + # creating the discord notify saga + saga_id = automation_workflow._create_manual_saga(data=data) + + rabbitmq.publish( + Queue.DISCORD_BOT, + event=Event.DISCORD_BOT.SEND_MESSAGE, + content={"uuid": saga_id}, + ) + automation_workflow.start(platform_id, guild_id) + + elif recompute is False: + logging.info(f"{msg}Just running the automation!") + automation_workflow.start(platform_id, guild_id) + else: + logging.info( + f"{msg}platform was not discord! given platform: {platform_name}" + "No automation or job finished message will be fired" + ) diff --git a/tc_analyzer_lib/schemas/__init__.py b/tc_analyzer_lib/schemas/__init__.py new file mode 100644 index 0000000..fa41b99 --- /dev/null +++ b/tc_analyzer_lib/schemas/__init__.py @@ -0,0 +1,6 @@ +# flake8: noqa +from .activity_type import ActivityDirection, ActivityType +from .graph import GraphSchema +from .hourly_analytics import HourlyAnalytics +from .raw_analytics import RawAnalytics +from .raw_analytics_item import RawAnalyticsItem diff --git a/tc_analyzer_lib/schemas/activity_type.py b/tc_analyzer_lib/schemas/activity_type.py new file mode 100644 index 0000000..4a82c16 --- /dev/null +++ b/tc_analyzer_lib/schemas/activity_type.py @@ -0,0 +1,11 @@ +from enum import Enum + + +class ActivityType(Enum): + ACTION = "actions" + INTERACTION = "interactions" + + +class ActivityDirection(Enum): + RECEIVER = "receiver" + EMITTER = "emitter" diff --git a/tc_analyzer_lib/schemas/graph.py b/tc_analyzer_lib/schemas/graph.py new file mode 100644 index 0000000..430378f --- /dev/null +++ b/tc_analyzer_lib/schemas/graph.py @@ -0,0 +1,44 @@ +class GraphSchema: + def __init__( + self, + platform: str, + interacted_with_rel: str = "INTERACTED_WITH", + interacted_in_rel: str = "INTERACTED_IN", + member_relation: str = "IS_MEMBER", + ) -> None: + """ + the graph schema + + Parameters + ------------ + platform : str + the name of a platform + could be `discord`, `discourse`, `telegram`, etc + would be converted into PascalCase + interacted_with_rel : str + the interacted with relation name + default is always to be `INTERACTED_WITH` + is always between members + interacted_in_rel : str + the interacted in relation name + default is always to be `INTERACTED_IN` + is always between a member to a platform + member_relation : str + the membership relation label + default is always to be `IS_MEMBER` + """ + platform = self._capitalize_first_letter(platform) + self.interacted_with_rel = interacted_with_rel + self.interacted_in_rel = interacted_in_rel + self.member_relation = member_relation + + self.user_label = platform + "Member" + self.platform_label = platform + "Platform" + + def _capitalize_first_letter(self, platform: str): + if "_" in platform or " " in platform: + raise ValueError( + "no underline or spaces should be in platform name. " + f"Given name: {platform}" + ) + return platform.title() diff --git a/tc_analyzer_lib/schemas/hourly_analytics.py b/tc_analyzer_lib/schemas/hourly_analytics.py new file mode 100644 index 0000000..7af3e0c --- /dev/null +++ b/tc_analyzer_lib/schemas/hourly_analytics.py @@ -0,0 +1,45 @@ +from . import ActivityDirection, ActivityType + + +class HourlyAnalytics: + def __init__( + self, + name: str, + type: ActivityType, + member_activities_used: bool, + direction: ActivityDirection, + rawmemberactivities_condition: dict | None = None, + activity_name: str | None = None, + ): + self.name = name + self.type = type + self.direction = direction + self.member_activities_used = member_activities_used + self.activity_name = activity_name + self.rawmemberactivities_condition = rawmemberactivities_condition + + def to_dict(self): + result = { + "name": self.name, + "type": self.type.value, + "direction": self.direction.value, + "member_activities_used": self.member_activities_used, + "activity_name": self.activity_name, + } + if self.rawmemberactivities_condition: + result["rawmemberactivities_condition"] = self.rawmemberactivities_condition + + return result + + @classmethod + def from_dict(cls, data: dict): + rawmemberactivities_condition = data.get("rawmemberactivities_condition") + + return cls( + name=data["name"], + type=ActivityType(data["type"]), + member_activities_used=data["member_activities_used"], + direction=ActivityDirection(data["direction"]), + activity_name=data.get("activity_name"), + rawmemberactivities_condition=rawmemberactivities_condition, + ) diff --git a/tc_analyzer_lib/schemas/platform_configs/__init__.py b/tc_analyzer_lib/schemas/platform_configs/__init__.py new file mode 100644 index 0000000..e6679a7 --- /dev/null +++ b/tc_analyzer_lib/schemas/platform_configs/__init__.py @@ -0,0 +1,2 @@ +# flake8: noqa +from .discord import DiscordAnalyzerConfig diff --git a/tc_analyzer_lib/schemas/platform_configs/config_base.py b/tc_analyzer_lib/schemas/platform_configs/config_base.py new file mode 100644 index 0000000..1e6aeef --- /dev/null +++ b/tc_analyzer_lib/schemas/platform_configs/config_base.py @@ -0,0 +1,36 @@ +from tc_analyzer_lib.schemas import HourlyAnalytics, RawAnalytics + + +class PlatformConfigBase: + def __init__( + self, + platform: str, + resource_identifier: str, + hourly_analytics: list[HourlyAnalytics], + raw_analytics: list[RawAnalytics], + ): + self.platform = platform + self.resource_identifier = resource_identifier + self.hourly_analytics = hourly_analytics + self.raw_analytics = raw_analytics + + def to_dict(self): + return { + "platform": self.platform, + "resource_identifier": self.resource_identifier, + "hourly_analytics": [ha.to_dict() for ha in self.hourly_analytics], + "raw_analytics": [ra.to_dict() for ra in self.raw_analytics], + } + + @classmethod + def from_dict(cls, data: dict): + hourly_analytics = [ + HourlyAnalytics.from_dict(ha) for ha in data["hourly_analytics"] + ] + raw_analytics = [RawAnalytics.from_dict(ra) for ra in data["raw_analytics"]] + return cls( + platform=data["platform"], + resource_identifier=data["resource_identifier"], + hourly_analytics=hourly_analytics, + raw_analytics=raw_analytics, + ) diff --git a/tc_analyzer_lib/schemas/platform_configs/discord.py b/tc_analyzer_lib/schemas/platform_configs/discord.py new file mode 100644 index 0000000..effb904 --- /dev/null +++ b/tc_analyzer_lib/schemas/platform_configs/discord.py @@ -0,0 +1,94 @@ +from tc_analyzer_lib.schemas import ( + ActivityDirection, + ActivityType, + HourlyAnalytics, + RawAnalytics, +) +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase + + +class DiscordAnalyzerConfig(PlatformConfigBase): + def __init__(self): + platform: str = "discord" + resource_identifier: str = "channel_id" + hourly_analytics: list[HourlyAnalytics] = [ + HourlyAnalytics( + name="thr_messages", + type=ActivityType.ACTION, + member_activities_used=True, + rawmemberactivities_condition={ + "metadata.thread_id": {"$ne": None}, + }, + direction=ActivityDirection.EMITTER, + activity_name="message", + ), + HourlyAnalytics( + name="lone_messages", + type=ActivityType.ACTION, + member_activities_used=True, + rawmemberactivities_condition={ + "metadata.thread_id": None, + }, + direction=ActivityDirection.EMITTER, + activity_name="message", + ), + HourlyAnalytics( + name="replier", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.RECEIVER, + ), + HourlyAnalytics( + name="replied", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.EMITTER, + ), + HourlyAnalytics( + name="mentioner", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.EMITTER, + ), + HourlyAnalytics( + name="mentioned", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.RECEIVER, + ), + HourlyAnalytics( + name="reacter", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.RECEIVER, + ), + HourlyAnalytics( + name="reacted", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.EMITTER, + ), + ] + + raw_analytics: list[RawAnalytics] = [ + RawAnalytics( + name="replied_per_acc", + type=ActivityType.INTERACTION, + member_activities_used=True, + direction=ActivityDirection.EMITTER, + ), + RawAnalytics( + name="mentioner_per_acc", + type=ActivityType.INTERACTION, + member_activities_used=True, + direction=ActivityDirection.EMITTER, + ), + RawAnalytics( + name="reacted_per_acc", + type=ActivityType.INTERACTION, + member_activities_used=True, + direction=ActivityDirection.EMITTER, + ), + ] + + super().__init__(platform, resource_identifier, hourly_analytics, raw_analytics) diff --git a/tc_analyzer_lib/schemas/raw_analytics.py b/tc_analyzer_lib/schemas/raw_analytics.py new file mode 100644 index 0000000..c6535f4 --- /dev/null +++ b/tc_analyzer_lib/schemas/raw_analytics.py @@ -0,0 +1,40 @@ +from . import ActivityDirection, ActivityType + + +class RawAnalytics: + def __init__( + self, + name: str, + type: ActivityType | str, + member_activities_used: bool, + direction: ActivityDirection, + activity_name: str | None = None, + rawmemberactivities_condition: dict | None = None, + ): + self.name = name + self.type = type if isinstance(type, ActivityType) else ActivityType(type) + self.member_activities_used = member_activities_used + self.direction = direction + self.activity_name = activity_name + self.rawmemberactivities_condition = rawmemberactivities_condition + + def to_dict(self): + return { + "name": self.name, + "type": self.type.value, + "member_activities_used": self.member_activities_used, + "direction": self.direction.value, + "rawmemberactivities_condition": self.rawmemberactivities_condition, + "activity_name": self.activity_name, + } + + @classmethod + def from_dict(cls, data: dict): + return cls( + name=data["name"], + type=ActivityType(data["type"]), + member_activities_used=data["member_activities_used"], + direction=ActivityDirection(data["direction"]), + activity_name=data.get("activity_name"), + rawmemberactivities_condition=data.get("rawmemberactivities_condition"), + ) diff --git a/tc_analyzer_lib/schemas/raw_analytics_item.py b/tc_analyzer_lib/schemas/raw_analytics_item.py new file mode 100644 index 0000000..d6a4887 --- /dev/null +++ b/tc_analyzer_lib/schemas/raw_analytics_item.py @@ -0,0 +1,11 @@ +class RawAnalyticsItem: + """ + Class for storing number of interactions per account + """ + + def __init__(self, account: str, count: int): + self.account = account + self.count = count + + def to_dict(self): + return {"account": self.account, "count": self.count} diff --git a/tc_analyzer_lib/tc_analyzer.py b/tc_analyzer_lib/tc_analyzer.py new file mode 100644 index 0000000..57d0a8a --- /dev/null +++ b/tc_analyzer_lib/tc_analyzer.py @@ -0,0 +1,213 @@ +import logging +from datetime import datetime + +from tc_analyzer_lib.metrics.analyzer_memberactivities import MemberActivities +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.utils.analyzer_db_manager import AnalyzerDBManager +from tc_analyzer_lib.metrics.utils.platform import Platform +from tc_analyzer_lib.schemas import GraphSchema +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase + + +class TCAnalyzer(AnalyzerDBManager): + """ + TogetherCrew's Analyzer + class that handles database connections and data analysis + """ + + def __init__( + self, + platform_id: str, + resources: list[str], + period: datetime, + action: dict[str, int], + window: dict[str, int], + analyzer_config: PlatformConfigBase = DiscordAnalyzerConfig(), + ): + """ + analyze the platform's data + producing heatmaps, memberactivities, and graph analytics + + Parameters + ----------- + platform_id : str + platform to analyze its data + resources : list[str] + the resources id for filtering on data + period : datetime + the period to compute the analytics for + action : dict[str, int] + Parameters for computing different memberactivities + window : dict[str, int] + Parameters for the whole analyzer, includes the step size and window size + analyzer_config : PlatformConfigBase + the config for analyzer to use + """ + logging.basicConfig() + logging.getLogger().setLevel(logging.INFO) + + self.platform_id = platform_id + self.resources = resources + self.period = period + self.action = action + self.window = window + self.analyzer_config = analyzer_config + + self.platform_utils = Platform(platform_id) + self.community_id = self.platform_utils.get_community_id() + + self.graph_schema = GraphSchema(platform=analyzer_config.platform) + self.neo4j_analytics = Neo4JAnalytics(platform_id, self.graph_schema) + + # connect to Neo4j & MongoDB database + self.database_connect() + + def analyze(self, recompute: bool) -> None: + # TODO: merge run_one and recompute codes + if recompute: + self.run_once() + else: + self.recompute() + + def run_once(self): + """Run analysis and append to previous anlaytics""" + # check if the platform was available + # if not, will raise an error + self.check_platform() + + logging.info(f"Creating heatmaps for platform id: {self.platform_id}") + + heatmaps_analysis = Heatmaps( + platform_id=self.platform_id, + period=self.period, + resources=self.resources, + analyzer_config=self.analyzer_config, + ) + heatmaps_data = heatmaps_analysis.start(from_start=False) + + # storing heatmaps since memberactivities use them + analytics_data = {} + analytics_data["heatmaps"] = heatmaps_data + analytics_data["memberactivities"] = (None, None) + + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + platform_id=self.platform_id, + graph_schema=self.graph_schema, + remove_memberactivities=False, + remove_heatmaps=False, + ) + + memberactivity_analysis = MemberActivities( + platform_id=self.platform_id, + resources=self.resources, + action_config=self.action, + window_config=self.window, + analyzer_config=self.analyzer_config, + analyzer_period=self.period, + ) + ( + member_activities_data, + member_acitivities_networkx_data, + ) = memberactivity_analysis.analysis_member_activity(from_start=False) + + analytics_data = {} + # storing whole data into a dictinoary + analytics_data["heatmaps"] = None + analytics_data["memberactivities"] = ( + member_activities_data, + member_acitivities_networkx_data, + ) + + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + platform_id=self.platform_id, + graph_schema=self.graph_schema, + remove_heatmaps=False, + remove_memberactivities=False, + ) + + self.neo4j_analytics.compute_metrics(from_start=False) + + self.platform_utils.update_isin_progress() + + def recompute(self): + """ + recompute the analytics (heatmaps + memberactivities + graph analytics) + for a new selection of channels + """ + # check if the platform was available + # if not, will raise an error + self.check_platform() + + logging.info(f"Analyzing the Heatmaps data for platform: {self.platform_id}!") + heatmaps_analysis = Heatmaps( + platform_id=self.platform_id, + period=self.period, + resources=self.resources, + analyzer_config=self.analyzer_config, + ) + heatmaps_data = heatmaps_analysis.start(from_start=True) + + # storing heatmaps since memberactivities use them + analytics_data = {} + analytics_data["heatmaps"] = heatmaps_data + analytics_data["memberactivities"] = (None, None) + + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + platform_id=self.platform_id, + graph_schema=self.graph_schema, + remove_memberactivities=False, + remove_heatmaps=True, + ) + + # run the member_activity analyze + logging.info( + f"Analyzing the MemberActivities data for platform: {self.platform_id}!" + ) + memberactivity_analysis = MemberActivities( + platform_id=self.platform_id, + resources=self.resources, + action_config=self.action, + window_config=self.window, + analyzer_config=self.analyzer_config, + analyzer_period=self.period, + ) + ( + member_activities_data, + member_acitivities_networkx_data, + ) = memberactivity_analysis.analysis_member_activity(from_start=True) + + # storing whole data into a dictinoary + analytics_data = {} + # storing whole data into a dictinoary + analytics_data["heatmaps"] = None + analytics_data["memberactivities"] = ( + member_activities_data, + member_acitivities_networkx_data, + ) + + logging.info(f"Storing analytics data for platform: {self.platform_id}!") + self.DB_connections.store_analytics_data( + analytics_data=analytics_data, + platform_id=self.platform_id, + graph_schema=self.graph_schema, + remove_memberactivities=True, + remove_heatmaps=False, + ) + + self.neo4j_analytics.compute_metrics(from_start=True) + self.platform_utils.update_isin_progress() + + def check_platform(self): + """ + check if the platform is available + """ + exist = self.platform_utils.check_existance() + if not exist: + raise ValueError( + f"Platform with platform_id: {self.platform_id} doesn't exist!" + ) diff --git a/discord_analyzer/analyzer/utils/__init__.py b/tc_analyzer_lib/utils/__init__.py similarity index 100% rename from discord_analyzer/analyzer/utils/__init__.py rename to tc_analyzer_lib/utils/__init__.py diff --git a/utils/credentials.py b/tc_analyzer_lib/utils/credentials.py similarity index 100% rename from utils/credentials.py rename to tc_analyzer_lib/utils/credentials.py diff --git a/utils/get_automation_env.py b/tc_analyzer_lib/utils/get_automation_env.py similarity index 100% rename from utils/get_automation_env.py rename to tc_analyzer_lib/utils/get_automation_env.py diff --git a/tc_analyzer_lib/utils/get_guild_utils.py b/tc_analyzer_lib/utils/get_guild_utils.py new file mode 100644 index 0000000..b1adf3e --- /dev/null +++ b/tc_analyzer_lib/utils/get_guild_utils.py @@ -0,0 +1,123 @@ +from bson.objectid import ObjectId +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def get_platform_guild_id(platform_id: str) -> str: + """ + get both the guild id and community from the platform id + + Parameters + ----------- + platform_id : str + the platform `_id` within the platforms collection + + Returns + -------- + guild_id : str + the discord guild id for that specific platform + """ + mongo_client = MongoSingleton.get_instance().client + + obj_platform_id = ObjectId(platform_id) + platform = mongo_client["Core"]["platforms"].find_one( + {"name": "discord", "_id": obj_platform_id}, + {"metadata.id": 1}, + ) + if platform is None: + raise AttributeError(f"PLATFORM_ID: {platform_id}, No guild found!") + + guild_id = platform["metadata"]["id"] + return guild_id + + +def get_guild_platform_id(guild_id: str) -> str: + """ + get the guild platform id using the given guild id + + Parameters + ------------ + guild_id : str + the id for the specified guild + + Returns + -------- + platform_id : str + the platform id related to the given guild + """ + mongo_client = MongoSingleton.get_instance().client + + guild_info = mongo_client["Core"]["platforms"].find_one( + {"metadata.id": guild_id}, {"_id": 1} + ) + if guild_info is not None: + platform_id = str(guild_info["_id"]) + else: + raise ValueError(f"No available guild with id {guild_id}") + + return platform_id + + +def get_platform_name(platform_id: str) -> str: + """ + get the platform name + + Parameters + ------------- + platform_id : str + the platform id related to a platform + + Returns + -------- + platform_name : str + the platform name i.e.: `discord`, `telegram`, etc + """ + client = MongoSingleton.get_instance().client + + platform = client["Core"]["platforms"].find_one( + {"_id": ObjectId(platform_id)}, {"name": True} + ) + if platform is None: + raise AttributeError( + f"platform with given platform_id: {platform_id} not found!" + ) + + return platform["name"] + + +def get_platform_community_owner(platform_id: str) -> str: + """ + get the community owener discord id using the platform id + + Parameters + ------------- + platform_id : str + the platform id related to a platform + + Returns + -------- + owner_discord_id : str + the owner discord id + """ + client = MongoSingleton.get_instance().client + + platform = client["Core"]["platforms"].find_one( + {"_id": ObjectId(platform_id)}, {"community": True} + ) + + if platform is None: + raise AttributeError( + f"platform with given platform_id: {platform_id} not found!" + ) + + community_id = platform["community"] + community = client["Core"]["users"].find_one( + {"communities": community_id}, {"discordId": True} + ) + if community is None: + raise AttributeError( + f"Community user with given community id: {community_id} not found!" + ) + + owner_discord_id = community["discordId"] + + return owner_discord_id diff --git a/utils/mongo.py b/tc_analyzer_lib/utils/mongo.py similarity index 93% rename from utils/mongo.py rename to tc_analyzer_lib/utils/mongo.py index 9d67566..c40e36d 100644 --- a/utils/mongo.py +++ b/tc_analyzer_lib/utils/mongo.py @@ -2,7 +2,7 @@ from typing import Any from pymongo import MongoClient -from utils.credentials import get_mongo_credentials +from tc_analyzer_lib.utils.credentials import get_mongo_credentials class MongoSingleton: diff --git a/tc_analyzer_lib/utils/rabbitmq.py b/tc_analyzer_lib/utils/rabbitmq.py new file mode 100644 index 0000000..a833b0f --- /dev/null +++ b/tc_analyzer_lib/utils/rabbitmq.py @@ -0,0 +1,42 @@ +import logging + +from tc_analyzer_lib.utils.credentials import get_rabbit_mq_credentials +from tc_messageBroker import RabbitMQ +from tc_messageBroker.rabbit_mq.queue import Queue + + +class RabbitMQAccess: + __instance = None + + def __init__(self): + # if RabbitMQAccess.__instance is not None: + # raise Exception("This class is a singleton!") + # else: + creds = get_rabbit_mq_credentials() + self.client = self.create_rabbitmq_client(creds) + RabbitMQAccess.__instance = self + + @staticmethod + def get_instance(): + # if RabbitMQAccess.__instance is None: + try: + RabbitMQAccess() + logging.info("RabbitMQ broker Connected Successfully!") + except Exception as exp: + logging.error(f"RabbitMQ broker not connected! exp: {exp}") + + return RabbitMQAccess.__instance + + def get_client(self): + return self.client + + def create_rabbitmq_client(self, rabbit_creds: dict[str, str]): + rabbitmq = RabbitMQ( + broker_url=rabbit_creds["broker_url"], + port=rabbit_creds["port"], + username=rabbit_creds["username"], + password=rabbit_creds["password"], + ) + rabbitmq.connect(queue_name=Queue.DISCORD_ANALYZER) + + return rabbitmq diff --git a/utils/redis.py b/tc_analyzer_lib/utils/redis.py similarity index 94% rename from utils/redis.py rename to tc_analyzer_lib/utils/redis.py index c8224ed..e0cff89 100644 --- a/utils/redis.py +++ b/tc_analyzer_lib/utils/redis.py @@ -1,7 +1,7 @@ import logging import redis -from utils.credentials import get_redis_credentials +from tc_analyzer_lib.utils.credentials import get_redis_credentials class RedisSingleton: diff --git a/tests/integration/test_activity_hourly_lone_message.py b/tests/integration/test_activity_hourly_lone_message.py deleted file mode 100644 index cfb125e..0000000 --- a/tests/integration/test_activity_hourly_lone_message.py +++ /dev/null @@ -1,64 +0,0 @@ -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps - - -def test_lone_messages(): - # data preparation - day = datetime(2023, 1, 1) - # hours to include interactions - hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] - DAY_COUNT = 2 - - acc_names = [] - for i in range(10): - acc_names.append(f"87648702709958252{i}") - - prepared_list = [] - channelIds = set() - dates = set() - - for i in range(DAY_COUNT): - for hour in hours_to_include: - for acc in acc_names: - data_date = (day + timedelta(days=i)).replace(hour=hour) - chId = f"10207071292141118{i}" - prepared_data = { - "mess_type": 0, - "author": acc, - "user_mentions": [], - "reactions": [], - "replied_user": None, - "datetime": data_date, - "channel": chId, - "threadId": None, - } - - prepared_list.append(prepared_data) - channelIds.add(chId) - dates.add(data_date.strftime("%Y-%m-%d")) - - (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) - - analyzer_heatmaps = Heatmaps("DB_connection", testing=False) - results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) - - assert len(results) == (len(acc_names) - 1) * DAY_COUNT - for document in results: - assert document["account_name"] in acc_names - assert document["date"] in dates - assert document["account_name"] in acc_names - assert document["channelId"] in channelIds - assert document["reacted_per_acc"] == [] - assert document["mentioner_per_acc"] == [] - assert document["replied_per_acc"] == [] - assert sum(document["thr_messages"]) == 0 - assert sum(document["mentioner"]) == 0 - assert sum(document["replied"]) == 0 - assert sum(document["replier"]) == 0 - assert sum(document["mentioned"]) == 0 - assert sum(document["reacter"]) == 0 - - # the only document we have - assert sum(document["lone_messages"]) == len(hours_to_include) diff --git a/tests/integration/test_activity_hourly_mentions.py b/tests/integration/test_activity_hourly_mentions.py deleted file mode 100644 index ac7f1ea..0000000 --- a/tests/integration/test_activity_hourly_mentions.py +++ /dev/null @@ -1,87 +0,0 @@ -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps - - -def test_mentioned_messages(): - # data preparation - day = datetime(2023, 1, 1) - # hours to include interactions - hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] - DAY_COUNT = 2 - - acc_names = [] - for i in range(10): - acc_names.append(f"87648702709958252{i}") - - prepared_list = [] - channelIds = set() - dates = set() - - for i in range(DAY_COUNT): - for hour in hours_to_include: - for acc in acc_names: - data_date = (day + timedelta(days=i)).replace(hour=hour) - chId = f"10207071292141118{i}" - prepared_data = { - "mess_type": 0, - "author": acc, - "user_mentions": ["876487027099582520", "876487027099582521"], - "reactions": [], - "replied_user": None, - "datetime": data_date, - "channel": chId, - "threadId": None, - } - - prepared_list.append(prepared_data) - channelIds.add(chId) - dates.add(data_date.strftime("%Y-%m-%d")) - - accs_mentioned = ["876487027099582520", "876487027099582521"] - - (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) - - analyzer_heatmaps = Heatmaps("DB_connection", testing=False) - results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) - - assert len(results) == (len(acc_names) - 1) * DAY_COUNT - for document in results: - assert document["account_name"] in acc_names - assert document["date"] in dates - assert document["channelId"] in channelIds - assert document["reacted_per_acc"] == [] - assert sum(document["thr_messages"]) == 0 - assert sum(document["reacter"]) == 0 - assert sum(document["replied"]) == 0 - assert sum(document["replier"]) == 0 - assert document["replied_per_acc"] == [] - assert sum(document["lone_messages"]) == len(hours_to_include) - - if document["account_name"] == "876487027099582520": - assert document["mentioner_per_acc"] == [ - ({"account": "876487027099582521", "count": (len(acc_names) - 2)},) - ] - assert sum(document["mentioner"]) == len(hours_to_include) - assert sum(document["mentioned"]) == len(hours_to_include) * ( - len(acc_names) - 2 - ) - - elif document["account_name"] == "876487027099582521": - assert document["mentioner_per_acc"] == [ - ({"account": "876487027099582520", "count": (len(acc_names) - 2)},) - ] - assert sum(document["mentioner"]) == len(hours_to_include) - assert sum(document["mentioned"]) == len(hours_to_include) * ( - len(acc_names) - 2 - ) - else: - assert document["mentioner_per_acc"] == [ - ({"account": "876487027099582520", "count": 9},), - ({"account": "876487027099582521", "count": 9},), - ] - assert sum(document["mentioner"]) == len(hours_to_include) * len( - accs_mentioned - ) - assert sum(document["mentioned"]) == 0 diff --git a/tests/integration/test_activity_hourly_reactions.py b/tests/integration/test_activity_hourly_reactions.py deleted file mode 100644 index 6bec4cb..0000000 --- a/tests/integration/test_activity_hourly_reactions.py +++ /dev/null @@ -1,88 +0,0 @@ -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps - - -def test_reacted_messages(): - # data preparation - day = datetime(2023, 1, 1) - # hours to include interactions - hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] - DAY_COUNT = 3 - - acc_names = [] - for i in range(10): - acc_names.append(f"87648702709958252{i}") - - prepared_list = [] - channelIds = set() - dates = set() - - for i in range(DAY_COUNT): - for hour in hours_to_include: - for acc in acc_names: - data_date = (day + timedelta(days=i)).replace(hour=hour) - chId = f"10207071292141118{i}" - prepared_data = { - "mess_type": 0, - "author": acc, - "user_mentions": [], - "reactions": [ - "876487027099582520,876487027099582521,👍", - "876487027099582522,heatface", - ], - "replied_user": None, - "datetime": data_date, - "channel": chId, - "threadId": None, - } - - prepared_list.append(prepared_data) - channelIds.add(chId) - dates.add(data_date.strftime("%Y-%m-%d")) - - reacted_accs = set( - ["876487027099582520", "876487027099582521", "876487027099582522"] - ) - - (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) - - analyzer_heatmaps = Heatmaps("DB_connection", testing=False) - results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) - - # print(results) - - assert len(results) == (len(acc_names) - 1) * DAY_COUNT - for document in results: - assert document["account_name"] in acc_names - assert document["date"] in dates - assert document["account_name"] in acc_names - assert document["channelId"] in channelIds - assert sum(document["thr_messages"]) == 0 - assert sum(document["mentioner"]) == 0 - assert sum(document["replied"]) == 0 - assert sum(document["replier"]) == 0 - assert sum(document["mentioned"]) == 0 - assert document["mentioner_per_acc"] == [] - assert document["replied_per_acc"] == [] - assert sum(document["lone_messages"]) == len(hours_to_include) - - if document["account_name"] not in reacted_accs: - assert document["reacted_per_acc"] == [ - ({"account": "876487027099582520", "count": len(acc_names) - 2},), - ({"account": "876487027099582521", "count": len(acc_names) - 2},), - ({"account": "876487027099582522", "count": len(acc_names) - 2},), - ] - - # the only document we have - # 3 is the emoji count - assert sum(document["reacter"]) == 0 - assert sum(document["reacted"]) == len(hours_to_include) * len(reacted_accs) - else: - assert sum(document["reacter"]) == len(hours_to_include) * ( - len(acc_names) - 2 - ) - assert sum(document["reacted"]) == len(hours_to_include) * ( - len(reacted_accs) - 1 - ) diff --git a/tests/integration/test_activity_hourly_replier.py b/tests/integration/test_activity_hourly_replier.py deleted file mode 100644 index e8223e8..0000000 --- a/tests/integration/test_activity_hourly_replier.py +++ /dev/null @@ -1,84 +0,0 @@ -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps - - -def test_reply_messages(): - # data preparation - day = datetime(2023, 1, 1) - # hours to include interactions - hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] - DAY_COUNT = 3 - - acc_names = [] - for i in range(10): - acc_names.append(f"87648702709958252{i}") - - prepared_list = [] - channelIds = set() - dates = set() - - for i in range(DAY_COUNT): - for hour in hours_to_include: - for acc in acc_names: - data_date = (day + timedelta(days=i)).replace(hour=hour) - chId = f"10207071292141118{i}" - prepared_data = { - "mess_type": 19, - "author": acc, - "user_mentions": [], - "reactions": [], - "replied_user": "876487027099582520", - "datetime": data_date, - "channel": chId, - "threadId": None, - } - - prepared_list.append(prepared_data) - channelIds.add(chId) - dates.add(data_date.strftime("%Y-%m-%d")) - - (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) - - analyzer_heatmaps = Heatmaps("DB_connection", testing=False) - results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) - # print(results) - assert len(results) == (len(acc_names) - 1) * DAY_COUNT - for document in results: - assert document["account_name"] in acc_names - assert document["date"] in dates - assert document["account_name"] in acc_names - assert document["channelId"] in channelIds - assert document["reacted_per_acc"] == [] - assert document["mentioner_per_acc"] == [] - assert sum(document["lone_messages"]) == 0 - assert sum(document["thr_messages"]) == 0 - assert sum(document["mentioner"]) == 0 - assert sum(document["mentioned"]) == 0 - assert sum(document["reacter"]) == 0 - - # if it is the account that everyone replied to - if document["account_name"] == "876487027099582520": - # the only document we have - assert document["replied_per_acc"] == [ - # `len(acc_names) - 2` is because - # len is returning one more and we are replying one account less - ({"account": "876487027099582520", "count": len(acc_names) - 2},), - ({"account": "876487027099582521", "count": len(acc_names) - 2},), - ({"account": "876487027099582522", "count": len(acc_names) - 2},), - ({"account": "876487027099582523", "count": len(acc_names) - 2},), - ({"account": "876487027099582524", "count": len(acc_names) - 2},), - ({"account": "876487027099582525", "count": len(acc_names) - 2},), - ({"account": "876487027099582526", "count": len(acc_names) - 2},), - ({"account": "876487027099582527", "count": len(acc_names) - 2},), - ({"account": "876487027099582528", "count": len(acc_names) - 2},), - ({"account": "876487027099582529", "count": len(acc_names) - 2},), - ] - assert sum(document["replier"]) == len(hours_to_include) - assert sum(document["replied"]) == len(hours_to_include) * ( - len(acc_names) - 1 - ) - # other accounts - else: - assert sum(document["replier"]) == len(hours_to_include) diff --git a/tests/integration/test_activity_hourly_thread_msg.py b/tests/integration/test_activity_hourly_thread_msg.py deleted file mode 100644 index a282bb0..0000000 --- a/tests/integration/test_activity_hourly_thread_msg.py +++ /dev/null @@ -1,62 +0,0 @@ -from datetime import datetime, timedelta - -from discord_analyzer.analysis.activity_hourly import activity_hourly -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps - - -def test_thread_messages(): - # data preparation - day = datetime(2023, 1, 1) - # hours to include interactions - hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] - DAY_COUNT = 2 - - acc_names = [] - for i in range(10): - acc_names.append(f"87648702709958252{i}") - - prepared_list = [] - channelIds = set() - dates = set() - - for i in range(DAY_COUNT): - for hour in hours_to_include: - for acc in acc_names: - data_date = (day + timedelta(days=i)).replace(hour=hour) - chId = f"10207071292141118{i}" - prepared_data = { - "mess_type": 0, - "author": acc, - "user_mentions": [], - "reactions": [], - "replied_user": None, - "datetime": data_date, - "channel": chId, - "threadId": f"109635841296880850{i}", - } - - prepared_list.append(prepared_data) - channelIds.add(chId) - dates.add(data_date.strftime("%Y-%m-%d")) - - (_, heatmap_data) = activity_hourly(prepared_list, acc_names=acc_names) - - analyzer_heatmaps = Heatmaps("DB_connection", testing=False) - results = analyzer_heatmaps._post_process_data(heatmap_data, len(acc_names)) - - assert len(results) == (len(acc_names) - 1) * DAY_COUNT - for document in results: - assert document["account_name"] in acc_names - assert document["date"] in dates - assert document["channelId"] in channelIds - assert document["reacted_per_acc"] == [] - assert document["mentioner_per_acc"] == [] - assert document["replied_per_acc"] == [] - assert sum(document["thr_messages"]) == len(hours_to_include) - assert sum(document["mentioner"]) == 0 - assert sum(document["replied"]) == 0 - assert sum(document["replier"]) == 0 - assert sum(document["mentioned"]) == 0 - assert sum(document["reacter"]) == 0 - - assert sum(document["lone_messages"]) == 0 diff --git a/tests/integration/test_all_active_period.py b/tests/integration/test_all_active_period.py index eed5a55..6ba4dd0 100644 --- a/tests/integration/test_all_active_period.py +++ b/tests/integration/test_all_active_period.py @@ -1,16 +1,15 @@ from datetime import datetime, timedelta -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_two_weeks_period_active_members(): """ test all_active members for the two weeks period in the new schema """ - guildId = "1234567" - platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + db_access = launch_db_access(platform_id) acc_id = [ "user0", @@ -21,103 +20,156 @@ def test_two_weeks_period_active_members(): # A guild connected at 35 days ago connected_days_before = 35 - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=connected_days_before, + resources=["1020707129214111827", "general_id"], ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("rawmemberactivities") # generating rawinfo samples rawinfo_samples = [] # A message from user0 to user1 on day 0 of past two weeks sample = { - "type": 19, - "author": acc_id[0], - "content": "test_message_0", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": acc_id[1], - "createdDate": (datetime.now() - timedelta(days=14)), - "messageId": "111881432193433601", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": acc_id[0], + "date": datetime.now() - timedelta(days=14), + "interactions": [ + {"name": "reply", "type": "emitter", "users_engaged_id": [acc_id[1]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "general_id", + "thread_id": None, + }, + "source_id": "111881432193433601", + } + sample2 = { + "actions": [], + "author_id": acc_id[1], + "date": datetime.now() - timedelta(days=14), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [acc_id[0]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "general_id", + "thread_id": None, + }, + "source_id": "111881432193433601", } rawinfo_samples.append(sample) + rawinfo_samples.append(sample2) # A message from user1 to user0 on day 0 of past two weeks sample = { - "type": 19, - "author": acc_id[1], - "content": "test_message_1", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": acc_id[0], - "createdDate": (datetime.now() - timedelta(days=14)), - "messageId": "111881432193433602", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": acc_id[1], + "date": datetime.now() - timedelta(days=14), + "interactions": [ + {"name": "reply", "type": "emitter", "users_engaged_id": [acc_id[0]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": "111881432193433602", + } + sample2 = { + "actions": [], + "author_id": acc_id[0], + "date": datetime.now() - timedelta(days=14), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [acc_id[1]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "general_id", + "thread_id": None, + }, + "source_id": "111881432193433602", } rawinfo_samples.append(sample) + rawinfo_samples.append(sample2) # A message from user2 to user3 on day 3 of past two weeks sample = { - "type": 19, - "author": acc_id[2], - "content": "test_message_1", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": acc_id[3], - "createdDate": (datetime.now() - timedelta(days=(14 - 3))), - "messageId": "111881432193433603", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": acc_id[2], + "date": datetime.now() - timedelta(days=(14 - 3)), + "interactions": [ + {"name": "reply", "type": "emitter", "users_engaged_id": [acc_id[3]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": "111881432193433603", + } + sample2 = { + "actions": [], + "author_id": acc_id[3], + "date": datetime.now() - timedelta(days=(14 - 3)), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [acc_id[2]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": "111881432193433603", } rawinfo_samples.append(sample) + rawinfo_samples.append(sample2) # A message from user3 to user2 on day 3 of past two weeks sample = { - "type": 19, - "author": acc_id[3], - "content": "test_message_1", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": acc_id[2], - "createdDate": (datetime.now() - timedelta(days=(14 - 3))), - "messageId": "111881432193433604", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": acc_id[3], + "date": datetime.now() - timedelta(days=(14 - 3)), + "interactions": [ + {"name": "reply", "type": "emitter", "users_engaged_id": [acc_id[2]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": "111881432193433604", + } + sample2 = { + "actions": [], + "author_id": acc_id[2], + "date": datetime.now() - timedelta(days=(14 - 3)), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [acc_id[3]]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": "111881432193433604", } - rawinfo_samples.append(sample) + rawinfo_samples.append(sample2) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -132,7 +184,7 @@ def test_two_weeks_period_active_members(): date_now = datetime.now() for activities in memberactivities: - date = datetime.fromisoformat(activities["date"]).date() + date = activities["date"].date() # print("date: ", date) # 14 days minues 7 if date == (date_now - timedelta(days=14)).date(): diff --git a/tests/integration/test_all_joined_day_members.py b/tests/integration/test_all_joined_day_members.py index 9b21c2c..4796fa7 100644 --- a/tests/integration/test_all_joined_day_members.py +++ b/tests/integration/test_all_joined_day_members.py @@ -3,22 +3,21 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_all_joined_day_members(): """ testing the all_joined_day """ - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) today = datetime.now() acc_id = [ - "973993299281076285", - "973993299281076286", + "user_0", + "user_1", ] # users joining 15 days ago # and 13 days ago @@ -27,52 +26,71 @@ def test_all_joined_day_members(): today - timedelta(days=13), ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, dates=acc_join_dates, days_ago_period=30, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") rawinfo_samples = [] # generating random rawinfo data for i in range(150): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() - cursor = db_access.db_mongo_client[guildId]["memberactivities"].find([]) + cursor = db_access.db_mongo_client[platform_id]["memberactivities"].find([]) memberactivities = list(cursor) for document in memberactivities: - date_str = document["date"].split("T")[0] - date = datetime.strptime(date_str, "%Y-%m-%d") + date = document["date"] # checking the types assert isinstance(document["all_joined_day"], list) @@ -82,35 +100,35 @@ def test_all_joined_day_members(): joined = set(document["all_joined"]) if (today - date).days == 15: - assert joined_day == {"973993299281076285"} - assert joined == {"973993299281076285"} + assert joined_day == {"user_0"} + assert joined == {"user_0"} elif (today - date).days == 14: assert joined_day == set() - assert joined == {"973993299281076285"} + assert joined == {"user_0"} elif (today - date).days == 13: - assert joined_day == {"973993299281076286"} - assert joined == {"973993299281076285", "973993299281076286"} + assert joined_day == {"user_1"} + assert joined == {"user_0", "user_1"} elif (today - date).days == 12: assert joined_day == set() - assert joined == {"973993299281076286", "973993299281076285"} + assert joined == {"user_1", "user_0"} elif (today - date).days == 11: assert joined_day == set() - assert joined == {"973993299281076286", "973993299281076285"} + assert joined == {"user_1", "user_0"} elif (today - date).days == 10: assert joined_day == set() - assert joined == {"973993299281076286", "973993299281076285"} + assert joined == {"user_1", "user_0"} elif (today - date).days == 9: assert joined_day == set() - assert joined == {"973993299281076286", "973993299281076285"} + assert joined == {"user_1", "user_0"} elif (today - date).days == 8: assert joined_day == set() - assert joined == {"973993299281076286", "973993299281076285"} + assert joined == {"user_1", "user_0"} elif (today - date).days == 7: assert joined_day == set() - assert joined == {"973993299281076286"} + assert joined == {"user_1"} elif (today - date).days == 6: assert joined_day == set() - assert joined == {"973993299281076286"} + assert joined == {"user_1"} else: assert joined_day == set() assert joined == set() diff --git a/tests/integration/test_analyzer_init.py b/tests/integration/test_analyzer_init.py deleted file mode 100644 index 8f9951e..0000000 --- a/tests/integration/test_analyzer_init.py +++ /dev/null @@ -1,109 +0,0 @@ -from datetime import datetime, timedelta - -from analyzer_init import AnalyzerInit -from bson.objectid import ObjectId -from utils.mongo import MongoSingleton - - -def test_analyzer_init(): - platform_id = "515151515151515151515151" - days_ago_period = 30 - community_id = "aabbccddeeff001122334455" - guildId = "1234" - - mongo_client = MongoSingleton.get_instance().get_client() - mongo_client["Core"]["platforms"].delete_one({"metadata.id": guildId}) - mongo_client.drop_database(guildId) - - act_param = { - "INT_THR": 1, - "UW_DEG_THR": 1, - "PAUSED_T_THR": 1, - "CON_T_THR": 4, - "CON_O_THR": 3, - "EDGE_STR_THR": 5, - "UW_THR_DEG_THR": 5, - "VITAL_T_THR": 4, - "VITAL_O_THR": 3, - "STILL_T_THR": 2, - "STILL_O_THR": 2, - "DROP_H_THR": 2, - "DROP_I_THR": 1, - } - window = { - "period_size": 7, - "step_size": 1, - } - - mongo_client["Core"]["platforms"].insert_one( - { - "_id": ObjectId(platform_id), - "name": "discord", - "metadata": { - "id": guildId, - "icon": "111111111111111111111111", - "name": "A guild", - "selectedChannels": ["1020707129214111827"], - "window": window, - "action": act_param, - "period": datetime.now() - timedelta(days=days_ago_period), - }, - "community": ObjectId(community_id), - "disconnectedAt": None, - "connectedAt": (datetime.now() - timedelta(days=days_ago_period + 10)), - "isInProgress": True, - "createdAt": datetime(2023, 11, 1), - "updatedAt": datetime(2023, 11, 1), - } - ) - - analyzer = AnalyzerInit(guildId) - - mongo_client[guildId]["guildmembers"].insert_one( - { - "discordId": "user1", - "username": "sample_user1", - "roles": ["1012430565959553145"], - "joinedAt": datetime.now() - timedelta(days=5), - "avatar": "3ddd6e429f75d6a711d0a58ba3060694", - "isBot": False, - "discriminator": "0", - } - ) - mongo_client[guildId].create_collection("heatmaps") - mongo_client[guildId].create_collection("memberactivities") - - # generating random rawinfo data - # 24 hours - # 90 days - rawinfo_samples = [] - for i in range(24 * days_ago_period): - sample = { - "type": 19, - "author": "user1", - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": "user2", - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - tc_discord_analyzer = analyzer.get_analyzer() - - tc_discord_analyzer.recompute_analytics() - - heatmaps_data = mongo_client[guildId]["heatmaps"].find_one({}) - assert heatmaps_data is not None - - memberactivities_data = mongo_client[guildId]["memberactivities"].find_one({}) - assert memberactivities_data is not None diff --git a/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py b/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py index 3454fb2..27a3f2c 100644 --- a/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py +++ b/tests/integration/test_analyzer_period_1year_recompute_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_one_year_period_recompute_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_one_year_period_recompute_available_analytics(): and use recompute method with some analytics data available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=360 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=360 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,7 +36,7 @@ def test_analyzer_one_year_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=354), count=353 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -47,7 +46,7 @@ def test_analyzer_one_year_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=360), count=359 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,28 +55,49 @@ def test_analyzer_one_year_period_recompute_available_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -89,11 +109,11 @@ def test_analyzer_one_year_period_recompute_available_analytics(): ) assert len(memberactivities_data) == 354 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 353 days ago rather than 354 document_start_date = yesterday - timedelta(days=353) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -102,6 +122,6 @@ def test_analyzer_one_year_period_recompute_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 360 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday date_ago = yesterday - timedelta(359) - assert heatmaps_data[-1]["date"] == date_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == date_ago diff --git a/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py index 0c086fe..5d02b6e 100644 --- a/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py +++ b/tests/integration/test_analyzer_period_1year_recompute_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_one_year_period_recompute_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_one_year_period_recompute_empty_analytics(): and use recompute method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=360 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=360 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,28 +35,49 @@ def test_analyzer_one_year_period_recompute_empty_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -70,11 +90,11 @@ def test_analyzer_one_year_period_recompute_empty_analytics(): # 354 days, analytics saving is the end day assert len(memberactivities_data) == (354) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 353 days ago rather than 354 document_start_date = yesterday - timedelta(days=353) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -83,6 +103,6 @@ def test_analyzer_one_year_period_recompute_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 360 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday year_ago = yesterday - timedelta(359) - assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == year_ago diff --git a/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py b/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py index 1c83b85..d925fd5 100644 --- a/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py +++ b/tests/integration/test_analyzer_period_1year_run_once_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_one_year_period_run_once_available_analytics(): @@ -15,39 +15,42 @@ def test_analyzer_one_year_period_run_once_available_analytics(): and use run_once method with some analytics data available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=360 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=360 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data # filling up to 2 days ago with 353 documents + start_day = (datetime.now() - timedelta(days=354)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=354), count=353 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 359 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=360), count=359 + start_day = (datetime.now() - timedelta(days=360)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=359) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,27 +59,48 @@ def test_analyzer_one_year_period_run_once_available_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -89,12 +113,12 @@ def test_analyzer_one_year_period_run_once_available_analytics(): ) # 354 days, analytics saving is the end day - assert len(memberactivities_data) == (354) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert len(memberactivities_data) == 354 + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 353 days ago rather than 354 document_start_date = yesterday - timedelta(days=353) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -103,6 +127,6 @@ def test_analyzer_one_year_period_run_once_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 359 + 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday year_ago = yesterday - timedelta(359) - assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == year_ago diff --git a/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py index c8865a0..f59820d 100644 --- a/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_1year_run_once_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_one_year_period_run_once_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_one_year_period_run_once_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=360 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=360 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,27 +35,48 @@ def test_analyzer_one_year_period_run_once_empty_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -70,11 +90,11 @@ def test_analyzer_one_year_period_run_once_empty_analytics(): # 354 days, analytics saving is the end day assert len(memberactivities_data) == (354) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 173 days ago rather than 174 document_start_date = yesterday - timedelta(days=353) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -83,6 +103,6 @@ def test_analyzer_one_year_period_run_once_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 360 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday year_ago = yesterday - timedelta(359) - assert heatmaps_data[-1]["date"] == year_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == year_ago diff --git a/tests/integration/test_analyzer_period_35days_run_once.py b/tests/integration/test_analyzer_period_35days_run_once.py index 5fa4ca8..73d1192 100644 --- a/tests/integration/test_analyzer_period_35days_run_once.py +++ b/tests/integration/test_analyzer_period_35days_run_once.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_40days_period_run_once_available_analytics_overlapping_period(): @@ -16,42 +16,40 @@ def test_analyzer_40days_period_run_once_available_analytics_overlapping_period( This test is utilized for the use case of overlapping period and 40 days ago """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "user1", "user2", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=40 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=40 ) - db_access.db_mongo_client[guildId].drop_collection("heatmaps") - db_access.db_mongo_client[guildId].drop_collection("memberactivities") - - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data # filling up to 4 days ago with 83 documents - memberactivity_data = create_empty_memberactivities_data( - datetime.now() - timedelta(days=33), count=29 + start_day = (datetime.now() - timedelta(days=33)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data = create_empty_memberactivities_data(start_day, count=29) + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 4 days ago with 89 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=40), count=36 + start_day = (datetime.now() - timedelta(days=40)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=36) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -59,27 +57,48 @@ def test_analyzer_40days_period_run_once_available_analytics_overlapping_period( # generating random rawinfo data # 24 hours, 40 days for i in range(24 * 40): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -92,11 +111,11 @@ def test_analyzer_40days_period_run_once_available_analytics_overlapping_period( ) assert len(memberactivities_data) == 33 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 32 days ago rather than 40 document_start_date = yesterday - timedelta(days=32) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -106,6 +125,6 @@ def test_analyzer_40days_period_run_once_available_analytics_overlapping_period( # (accounts are: "user1", "user2") assert len(heatmaps_data) == 44 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(39) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py index 0853632..1497865 100644 --- a/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py +++ b/tests/integration/test_analyzer_period_3month_recompute_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_three_month_period_recompute_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_three_month_period_recompute_available_analytics(): and use recompute method with some analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=90 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=90 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,7 +36,7 @@ def test_analyzer_three_month_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=84), count=83 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -47,7 +46,7 @@ def test_analyzer_three_month_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=90), count=89 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,28 +55,49 @@ def test_analyzer_three_month_period_recompute_available_analytics(): # 24 hours # 90 days for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -89,11 +109,11 @@ def test_analyzer_three_month_period_recompute_available_analytics(): ) assert len(memberactivities_data) == 84 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 83 days ago rather than 84 document_start_date = yesterday - timedelta(days=83) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -102,6 +122,6 @@ def test_analyzer_three_month_period_recompute_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 90 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(89) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py index 6820ab2..09c4166 100644 --- a/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py +++ b/tests/integration/test_analyzer_period_3month_recompute_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_three_month_period_recompute_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_three_month_period_recompute_empty_analytics(): and use recompute method with no analytics data available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=90 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=90 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,28 +35,49 @@ def test_analyzer_three_month_period_recompute_empty_analytics(): # 24 hours # 90 days for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -69,11 +89,11 @@ def test_analyzer_three_month_period_recompute_empty_analytics(): ) assert len(memberactivities_data) == 84 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 83 days ago rather than 84 document_start_date = yesterday - timedelta(days=83) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -82,6 +102,6 @@ def test_analyzer_three_month_period_recompute_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 90 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(89) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py index 0fbea02..f1ceb5a 100644 --- a/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py +++ b/tests/integration/test_analyzer_period_3month_run_once_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_three_month_period_run_once_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_three_month_period_run_once_available_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=90 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=90 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,17 +36,18 @@ def test_analyzer_three_month_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=84), count=83 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 89 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=90), count=89 + start_day = (datetime.now() - timedelta(days=90)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=89) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,27 +56,48 @@ def test_analyzer_three_month_period_run_once_available_analytics(): # 24 hours, 90 days # 24 * 90 = 720 for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -89,11 +110,11 @@ def test_analyzer_three_month_period_run_once_available_analytics(): ) assert len(memberactivities_data) == 84 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 83 days ago rather than 84 document_start_date = yesterday - timedelta(days=83) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -103,6 +124,6 @@ def test_analyzer_three_month_period_run_once_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 89 + 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(89) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py index c5b9131..f3b51dd 100644 --- a/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_3month_run_once_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_three_month_period_run_once_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_three_month_period_run_once_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=90 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=90 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,27 +35,48 @@ def test_analyzer_three_month_period_run_once_empty_analytics(): # 24 hours, 90 days # 24 * 90 for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -69,11 +89,11 @@ def test_analyzer_three_month_period_run_once_empty_analytics(): ) assert len(memberactivities_data) == 84 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 83 days ago rather than 84 document_start_date = yesterday - timedelta(days=83) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -82,6 +102,6 @@ def test_analyzer_three_month_period_run_once_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 90 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(89) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py index 91837ed..5bb7e48 100644 --- a/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py +++ b/tests/integration/test_analyzer_period_6month_recompute_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_six_month_period_recompute_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_six_month_period_recompute_available_analytics(): and use recompute method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=180 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=180 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,17 +36,18 @@ def test_analyzer_six_month_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=174), count=173 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 179 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=180), count=179 + start_day = (datetime.now() - timedelta(days=180)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=179) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,28 +56,49 @@ def test_analyzer_six_month_period_recompute_available_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -91,11 +112,11 @@ def test_analyzer_six_month_period_recompute_available_analytics(): # 180 days, analytics saving is the end day # so the 7 days start wouldn't be counted assert len(memberactivities_data) == (174) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 173 days ago rather than 174 document_start_date = yesterday - timedelta(days=173) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -104,6 +125,6 @@ def test_analyzer_six_month_period_recompute_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 180 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(179) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py index 20ddf02..14b90a4 100644 --- a/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py +++ b/tests/integration/test_analyzer_period_6month_recompute_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_six_month_period_recompute_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_six_month_period_recompute_empty_analytics(): and use recompute method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=180 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=180 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,28 +35,49 @@ def test_analyzer_six_month_period_recompute_empty_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -71,11 +91,11 @@ def test_analyzer_six_month_period_recompute_empty_analytics(): # 180 days, analytics saving is the end day # so the 7 days start wouldn't be counted assert len(memberactivities_data) == (174) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 173 days ago rather than 174 document_start_date = yesterday - timedelta(days=173) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -84,6 +104,6 @@ def test_analyzer_six_month_period_recompute_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 180 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(179) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py index 0f8ffbc..94dff3e 100644 --- a/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py +++ b/tests/integration/test_analyzer_period_6month_run_once_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_six_month_period_run_once_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_six_month_period_run_once_available_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=180 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=180 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,17 +36,18 @@ def test_analyzer_six_month_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=174), count=173 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 179 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=180), count=179 + start_day = (datetime.now() - timedelta(days=180)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=179) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -56,27 +56,48 @@ def test_analyzer_six_month_period_run_once_available_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -89,11 +110,11 @@ def test_analyzer_six_month_period_run_once_available_analytics(): ) assert len(memberactivities_data) == 174 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 173 days ago rather than 174 document_start_date = yesterday - timedelta(days=173) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -103,6 +124,6 @@ def test_analyzer_six_month_period_run_once_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 179 + 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(179) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py index 8feb260..c4eee00 100644 --- a/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_6month_run_once_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_six_month_period_run_once_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_six_month_period_run_once_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=180 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=180 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -36,27 +35,48 @@ def test_analyzer_six_month_period_run_once_empty_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -70,12 +90,12 @@ def test_analyzer_six_month_period_run_once_empty_analytics(): # 180 days, analytics saving is the end day # so the 7 days start wouldn't be counted - assert len(memberactivities_data) == (174) - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert len(memberactivities_data) == 174 + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 173 days ago rather than 174 document_start_date = yesterday - timedelta(days=173) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -84,6 +104,6 @@ def test_analyzer_six_month_period_run_once_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 180 * 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(179) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_month_recompute_available_analytics.py b/tests/integration/test_analyzer_period_month_recompute_available_analytics.py index b32902e..465d845 100644 --- a/tests/integration/test_analyzer_period_month_recompute_available_analytics.py +++ b/tests/integration/test_analyzer_period_month_recompute_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_month_period_recompute_available_analytics(): @@ -15,21 +15,20 @@ def test_analyzer_month_period_recompute_available_analytics(): and use recompute method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=30 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=30 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -37,17 +36,18 @@ def test_analyzer_month_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=24), count=23 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 29 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=30), count=29 + start_day = (datetime.now() - timedelta(days=30)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=29) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -55,28 +55,49 @@ def test_analyzer_month_period_recompute_available_analytics(): # generating random rawinfo data # 24 * 30 = 720 for i in range(720): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -88,11 +109,11 @@ def test_analyzer_month_period_recompute_available_analytics(): ) assert len(memberactivities_data) == 24 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 23 days ago rather than 24 document_start_date = yesterday - timedelta(days=23) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == document_start_date heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -101,6 +122,6 @@ def test_analyzer_month_period_recompute_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 60 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(29) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py index 1cf6e2d..f0f313c 100644 --- a/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py +++ b/tests/integration/test_analyzer_period_month_recompute_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_month_period_recompute_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_month_period_recompute_empty_analytics(): and use recompute method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=30 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=30 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -35,28 +34,49 @@ def test_analyzer_month_period_recompute_empty_analytics(): # generating random rawinfo data # 24 * 30 = 720 for i in range(720): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find( "memberactivities", {}, sorting=("date", -1) @@ -68,11 +88,11 @@ def test_analyzer_month_period_recompute_empty_analytics(): ) assert len(memberactivities_data) == 24 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 23 days ago rather than 24 document_start_date = yesterday - timedelta(days=23) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -81,6 +101,6 @@ def test_analyzer_month_period_recompute_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 60 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(29) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_month_run_once_available_analytics.py b/tests/integration/test_analyzer_period_month_run_once_available_analytics.py index 8cc38a8..692782a 100644 --- a/tests/integration/test_analyzer_period_month_run_once_available_analytics.py +++ b/tests/integration/test_analyzer_period_month_run_once_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_month_period_run_once_available_analytics(): @@ -15,39 +15,40 @@ def test_analyzer_month_period_run_once_available_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=30 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=30 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data # filling up to 2 days ago with 23 documents - memberactivity_data = create_empty_memberactivities_data( - datetime.now() - timedelta(days=24), count=23 + start_day = (datetime.now() - timedelta(days=24)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data = create_empty_memberactivities_data(start_day, count=23) + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data # filling up to 2 days ago with 29 documents # just yesterday is left to be analyzed - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=30), count=29 + start_day = (datetime.now() - timedelta(days=30)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=29) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -55,27 +56,48 @@ def test_analyzer_month_period_run_once_available_analytics(): # generating random rawinfo data # 24 * 30 = 720 for i in range(720): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -88,11 +110,11 @@ def test_analyzer_month_period_run_once_available_analytics(): ) assert len(memberactivities_data) == 24 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # so we would use 23 days ago rather than 24 document_start_date = yesterday - timedelta(days=23) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -102,6 +124,6 @@ def test_analyzer_month_period_run_once_available_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 29 + 2 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(29) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py index a135f0c..a84fe47 100644 --- a/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_month_run_once_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_month_period_run_once_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_month_period_run_once_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=30 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=30 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -35,27 +34,48 @@ def test_analyzer_month_period_run_once_empty_analytics(): # generating random rawinfo data # 24 * 30 = 720 for i in range(720): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -68,11 +88,11 @@ def test_analyzer_month_period_run_once_empty_analytics(): ) assert len(memberactivities_data) == 24 - assert memberactivities_data[0]["date"] == yesterday.isoformat() + assert memberactivities_data[0]["date"] == yesterday # yesterday is `-1` day and so # we would use 23 days ago rather than 24 document_start_date = yesterday - timedelta(days=23) - assert memberactivities_data[-1]["date"] == (document_start_date).isoformat() + assert memberactivities_data[-1]["date"] == (document_start_date) heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) @@ -81,6 +101,6 @@ def test_analyzer_month_period_run_once_empty_analytics(): # (accounts are: "973993299281076285", "973993299281076286") assert len(heatmaps_data) == 60 # checking first and last document - assert heatmaps_data[0]["date"] == yesterday.strftime("%Y-%m-%d") + assert heatmaps_data[0]["date"] == yesterday month_ago = yesterday - timedelta(29) - assert heatmaps_data[-1]["date"] == month_ago.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == month_ago diff --git a/tests/integration/test_analyzer_period_week_recompute_available_analytics.py b/tests/integration/test_analyzer_period_week_recompute_available_analytics.py index 42bbf53..0eb5282 100644 --- a/tests/integration/test_analyzer_period_week_recompute_available_analytics.py +++ b/tests/integration/test_analyzer_period_week_recompute_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_week_period_recompute_available_analytics(): @@ -16,34 +16,35 @@ def test_analyzer_week_period_recompute_available_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ - "973993299281076285", - "973993299281076286", + "user_1", + "user_2", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=8 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=8 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data - memberactivity_data = create_empty_memberactivities_data( - datetime.now() - timedelta(days=2), count=1 + start_day = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data = create_empty_memberactivities_data(start_day, count=1) + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=7), count=1 + start_day = (datetime.now() - timedelta(days=7)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=1) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -51,28 +52,50 @@ def test_analyzer_week_period_recompute_available_analytics(): # generating random rawinfo data # 24 hour * 7 days for i in range(168): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = list(set(acc_id) - set([author]))[0] + # replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() memberactivities_cursor = db_access.query_db_find("memberactivities", {}) memberactivities_data = list(memberactivities_cursor) @@ -80,11 +103,9 @@ def test_analyzer_week_period_recompute_available_analytics(): hour=0, minute=0, second=0, microsecond=0 ) - print("memberactivities_data: ", memberactivities_data) - memberactivities_expected_dates = [ - yesterday.isoformat(), - (yesterday - timedelta(days=1)).isoformat(), + yesterday, + (yesterday - timedelta(days=1)), ] # two documents in memberactivities @@ -97,21 +118,19 @@ def test_analyzer_week_period_recompute_available_analytics(): ) heatmaps_data = list(heatmaps_cursor) - print("heatmaps_data: ", heatmaps_data) - heatmaps_expected_dates = [ - yesterday.strftime("%Y-%m-%d"), - (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), + yesterday, + (yesterday - timedelta(days=1)), + (yesterday - timedelta(days=2)), + (yesterday - timedelta(days=3)), + (yesterday - timedelta(days=4)), + (yesterday - timedelta(days=5)), + (yesterday - timedelta(days=6)), + (yesterday - timedelta(days=7)), ] - # 7 days, multiplied with 2 - # (accounts are: "973993299281076285", "973993299281076286") - assert len(heatmaps_data) == 14 + # 8 days, multiplied with 2 (starting from the period) + # (accounts are: "user_1", "user_2") + assert len(heatmaps_data) == 16 # last document must be for yesterday for document in heatmaps_data: diff --git a/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py b/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py index 13d4394..3794f28 100644 --- a/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py +++ b/tests/integration/test_analyzer_period_week_recompute_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_week_period_recompute_empty_analytics(): @@ -13,21 +13,20 @@ def test_analyzer_week_period_recompute_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=7 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=7 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -35,30 +34,51 @@ def test_analyzer_week_period_recompute_empty_analytics(): # generating random rawinfo data # 24 hour * 7 days for i in range(168): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() - - memberactivities_cursor = db_access.db_mongo_client[guildId][ + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() + + memberactivities_cursor = db_access.db_mongo_client[platform_id][ "memberactivities" ].find({}) memberactivities_data = list(memberactivities_cursor) @@ -69,13 +89,13 @@ def test_analyzer_week_period_recompute_empty_analytics(): # just one document in memberactivities assert len(memberactivities_data) == 1 - assert memberactivities_data[0]["date"] == date.isoformat() + assert memberactivities_data[0]["date"] == date - heatmaps_cursor = db_access.db_mongo_client[guildId]["heatmaps"].find({}) + heatmaps_cursor = db_access.db_mongo_client[platform_id]["heatmaps"].find({}) heatmaps_data = list(heatmaps_cursor) # 7 days, multiplied with 2 # (accounts are: "973993299281076285", "remainder") assert len(heatmaps_data) == 14 # last document must be for yesterday - assert heatmaps_data[-1]["date"] == date.strftime("%Y-%m-%d") + assert heatmaps_data[-1]["date"] == date diff --git a/tests/integration/test_analyzer_period_week_run_once_available_analytics.py b/tests/integration/test_analyzer_period_week_run_once_available_analytics.py index 57ed9f9..f4245fc 100644 --- a/tests/integration/test_analyzer_period_week_run_once_available_analytics.py +++ b/tests/integration/test_analyzer_period_week_run_once_available_analytics.py @@ -3,10 +3,10 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_week_period_run_once_available_analytics(): @@ -16,61 +16,83 @@ def test_analyzer_week_period_run_once_available_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "973993299281076285", "973993299281076286", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=8 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=8 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data - memberactivity_data = create_empty_memberactivities_data( - datetime.now() - timedelta(days=2), count=1 + start_day = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + memberactivity_data = create_empty_memberactivities_data(start_day, count=1) + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data - heatmaps_data = create_empty_heatmaps_data( - datetime.now() - timedelta(days=7), count=1 + start_day = (datetime.now() - timedelta(days=7)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + heatmaps_data = create_empty_heatmaps_data(start_day, count=1) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] # generating random rawinfo data for i in range(150): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -84,8 +106,8 @@ def test_analyzer_week_period_run_once_available_analytics(): print("memberactivities_data: ", memberactivities_data) memberactivities_expected_dates = [ - yesterday.isoformat(), - (yesterday - timedelta(days=1)).isoformat(), + yesterday, + (yesterday - timedelta(days=1)), ] # two documents in memberactivities @@ -101,21 +123,21 @@ def test_analyzer_week_period_run_once_available_analytics(): print("heatmaps_data: ", heatmaps_data) heatmaps_expected_dates = [ - yesterday.strftime("%Y-%m-%d"), - yesterday.strftime("%Y-%m-%d"), - (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), + yesterday, + yesterday, + (yesterday - timedelta(days=1)), + (yesterday - timedelta(days=1)), + (yesterday - timedelta(days=2)), + (yesterday - timedelta(days=2)), + (yesterday - timedelta(days=3)), + (yesterday - timedelta(days=3)), + (yesterday - timedelta(days=4)), + (yesterday - timedelta(days=4)), + (yesterday - timedelta(days=5)), + (yesterday - timedelta(days=5)), + (yesterday - timedelta(days=6)), + (yesterday - timedelta(days=6)), + (yesterday - timedelta(days=7)), ] # 6 days, multiplied with 2 # (accounts are: "973993299281076285", "973993299281076286") diff --git a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py index db07be5..bded353 100644 --- a/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py +++ b/tests/integration/test_analyzer_period_week_run_once_empty_analytics.py @@ -3,8 +3,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_week_period_run_once_empty_analytics(): @@ -13,48 +13,68 @@ def test_analyzer_week_period_run_once_empty_analytics(): and use run_once method with empty analytics available """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ - "973993299281076285", - "973993299281076286", + "user_0", + "user_1", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=7 + analyzer = setup_platform( + db_access, platform_id, discordId_list=acc_id, days_ago_period=7 ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] # generating random rawinfo data for i in range(150): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() memberactivities_cursor = db_access.query_db_find( @@ -65,10 +85,8 @@ def test_analyzer_week_period_run_once_empty_analytics(): hour=0, minute=0, second=0, microsecond=0 ) - print("memberactivities_data: ", memberactivities_data) - memberactivities_expected_dates = [ - yesterday.isoformat(), + yesterday, # (yesterday - timedelta(days=1)).isoformat() ] @@ -82,28 +100,26 @@ def test_analyzer_week_period_run_once_empty_analytics(): heatmaps_cursor = db_access.query_db_find("heatmaps", {}, sorting=("date", -1)) heatmaps_data = list(heatmaps_cursor) - print("heatmaps_data: ", heatmaps_data) - heatmaps_expected_dates = [ - yesterday.strftime("%Y-%m-%d"), - yesterday.strftime("%Y-%m-%d"), - (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=1)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=2)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=3)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=4)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=5)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), - (yesterday - timedelta(days=6)).strftime("%Y-%m-%d"), + yesterday, + yesterday, + (yesterday - timedelta(days=1)), + (yesterday - timedelta(days=1)), + (yesterday - timedelta(days=2)), + (yesterday - timedelta(days=2)), + (yesterday - timedelta(days=3)), + (yesterday - timedelta(days=3)), + (yesterday - timedelta(days=4)), + (yesterday - timedelta(days=4)), + (yesterday - timedelta(days=5)), + (yesterday - timedelta(days=5)), + (yesterday - timedelta(days=6)), + (yesterday - timedelta(days=6)), # (yesterday - timedelta(days=7)).strftime("%Y-%m-%d"), ] # 6 days, multiplied with 2 # (accounts are: "973993299281076285", "973993299281076286") - assert len(heatmaps_data) == 12 + assert len(heatmaps_data) == 14 # last document must be for yesterday data = zip(heatmaps_expected_dates, heatmaps_data) for date, document in data: diff --git a/tests/integration/test_analyzer_platform_fetch_period.py b/tests/integration/test_analyzer_platform_fetch_period.py new file mode 100644 index 0000000..9ca4f44 --- /dev/null +++ b/tests/integration/test_analyzer_platform_fetch_period.py @@ -0,0 +1,125 @@ +from datetime import datetime +from unittest import TestCase + +from bson import ObjectId +from tc_analyzer_lib.metrics.utils import Platform +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestPlatformUtilsFetchPeriod(TestCase): + def setUp(self) -> None: + self.client = MongoSingleton.get_instance().get_client() + self.client["Core"].drop_collection("platforms") + + def test_get_period_empty_platform(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_utils = Platform(platform_id) + + with self.assertRaises(AttributeError): + _ = platform_utils.get_platform_period() + + def test_get_period_single_platform(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 1), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + } + ) + + platform_obj = Platform(platform_id) + + period = platform_obj.get_platform_period() + self.assertIsInstance(period, datetime) + self.assertEqual(period, datetime(2024, 1, 1)) + + def test_get_period_multiple_platforms(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 1), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 2), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 3), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + ] + ) + + platform_obj = Platform(platform_id2) + + period = platform_obj.get_platform_period() + self.assertIsInstance(period, datetime) + self.assertEqual(period, datetime(2024, 1, 2)) + + def test_get_period_irrelevant_multiple_platforms(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + platform_id4 = "60d5ec44f9a3c2b6d7e2d11d" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 1), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 2), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "isInProgress": True, + "period": datetime(2024, 1, 3), + }, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + ] + ) + + platform_obj = Platform(platform_id4) + + with self.assertRaises(AttributeError): + _ = platform_obj.get_platform_period() diff --git a/tests/integration/test_analyzer_platform_fetch_resources.py b/tests/integration/test_analyzer_platform_fetch_resources.py new file mode 100644 index 0000000..eb46fd2 --- /dev/null +++ b/tests/integration/test_analyzer_platform_fetch_resources.py @@ -0,0 +1,125 @@ +from datetime import datetime +from unittest import TestCase + +from bson import ObjectId +from tc_analyzer_lib.metrics.utils import Platform +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestPlatformUtilsFetchResources(TestCase): + def setUp(self) -> None: + self.client = MongoSingleton.get_instance().get_client() + self.client["Core"].drop_collection("platforms") + + def test_get_period_empty_platform(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_utils = Platform(platform_id) + + with self.assertRaises(AttributeError): + _ = platform_utils.get_platform_resources() + + def test_get_period_single_platform(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 1), + } + ) + + platform_obj = Platform(platform_id) + + resources = platform_obj.get_platform_resources() + self.assertIsInstance(resources, list) + self.assertEqual(resources, ["channel_0", "channel_1", "channel_2"]) + + def test_get_period_multiple_platforms(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 1), + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_A", "channel_B", "channel_C"], + }, + "period": datetime(2024, 1, 2), + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 3), + }, + ] + ) + + platform_obj = Platform(platform_id2) + + resources = platform_obj.get_platform_resources() + self.assertIsInstance(resources, list) + self.assertEqual(resources, ["channel_A", "channel_B", "channel_C"]) + + def test_get_period_irrelevant_multiple_platforms(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + platform_id4 = "60d5ec44f9a3c2b6d7e2d11d" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 1), + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 2), + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "isInProgress": True, + "resources": ["channel_0", "channel_1", "channel_2"], + }, + "period": datetime(2024, 1, 3), + }, + ] + ) + + platform_obj = Platform(platform_id4) + + with self.assertRaises(AttributeError): + _ = platform_obj.get_platform_resources() diff --git a/tests/integration/test_analyzer_utils_platform.py b/tests/integration/test_analyzer_utils_platform.py new file mode 100644 index 0000000..a6974a5 --- /dev/null +++ b/tests/integration/test_analyzer_utils_platform.py @@ -0,0 +1,171 @@ +from unittest import TestCase + +from bson import ObjectId +from tc_analyzer_lib.metrics.utils import Platform +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestAnalyzerUtilsPlatform(TestCase): + def setUp(self) -> None: + self.client = MongoSingleton.get_instance().get_client() + + self.client["Core"].drop_collection("platforms") + + def test_no_platforms_check_existance(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_obj = Platform(platform_id) + + self.assertFalse(platform_obj.check_existance()) + + def test_single_platforms_check_existance(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + } + ) + platform_obj = Platform(platform_id) + + self.assertTrue(platform_obj.check_existance()) + + def test_single_platforms_irrelevant_check_existance(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + } + ) + # checking for the second platform availability on db + platform_obj = Platform(platform_id2) + self.assertFalse(platform_obj.check_existance()) + + def test_multiple_platforms_irrelevant_check_existance(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + platform_id4 = "60d5ec44f9a3c2b6d7e2d11d" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + }, + ] + ) + # checking for the fourth platform availability on db + platform_obj = Platform(platform_id4) + self.assertFalse(platform_obj.check_existance()) + + def test_single_platform_update_isin_progress(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + } + ) + + platform_obj = Platform(platform_id) + platform_obj.update_isin_progress() + + platform = self.client["Core"]["platforms"].find_one( + {"_id": ObjectId(platform_id)} + ) + + self.assertFalse(platform["metadata"]["isInProgress"]) + + def test_single_platform_unavailable_platform_update_isin_progress(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "isInProgress": True, + }, + } + ) + + platform_obj = Platform(platform_id2) + + # the platform was not available + with self.assertRaises(AttributeError): + platform_obj.update_isin_progress() + + def test_get_community_id_no_platforms(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_obj = Platform(platform_id) + + # no platform was available + with self.assertRaises(ValueError): + platform_obj.get_community_id() + + def test_get_community_id_single_platform_available(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + expected_community_id = "77d5ec44f6a3c2b6d7e2d11a" + + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "community": ObjectId(expected_community_id), + "metadata": { + "isInProgress": True, + }, + } + ) + platform_obj = Platform(platform_id) + community_id = platform_obj.get_community_id() + + self.assertEqual(expected_community_id, community_id) + + def test_get_community_id_irrelevant_platform_available(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + community_id = "77d5ec44f6a3c2b6d7e2d11a" + + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id2), + "name": "discord", + "community": ObjectId(community_id), + "metadata": { + "isInProgress": True, + }, + } + ) + platform_obj = Platform(platform_id) + + with self.assertRaises(ValueError): + _ = platform_obj.get_community_id() diff --git a/tests/integration/test_analyzer_utils_platform_parameters.py b/tests/integration/test_analyzer_utils_platform_parameters.py new file mode 100644 index 0000000..c6add2a --- /dev/null +++ b/tests/integration/test_analyzer_utils_platform_parameters.py @@ -0,0 +1,187 @@ +from datetime import datetime, timedelta +from unittest import TestCase + +from bson import ObjectId +from tc_analyzer_lib.metrics.utils import Platform +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestAnalyzerUtilsPlatform(TestCase): + def setUp(self) -> None: + self.client = MongoSingleton.get_instance().get_client() + + self.client["Core"].drop_collection("platforms") + + def test_no_platform(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_obj = Platform(platform_id) + + with self.assertRaises(AttributeError): + platform_obj.get_platform_analyzer_params() + + def test_single_platform_available(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + guildId = "1234" + sample_action = { + "INT_THR": 1, + "UW_DEG_THR": 1, + "PAUSED_T_THR": 1, + "CON_T_THR": 4, + "CON_O_THR": 3, + "EDGE_STR_THR": 5, + "UW_THR_DEG_THR": 5, + "VITAL_T_THR": 4, + "VITAL_O_THR": 3, + "STILL_T_THR": 2, + "STILL_O_THR": 2, + "DROP_H_THR": 2, + "DROP_I_THR": 1, + } + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "id": guildId, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": sample_action, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334455"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + } + ) + platform_obj = Platform(platform_id) + window, action = platform_obj.get_platform_analyzer_params() + + self.assertEqual(window, {"period_size": 7, "step_size": 1}) + self.assertEqual(sample_action, action) + + def test_multiple_platforms_available(self): + platform_id = "60d5ec44f9a3c2b6d7e2d11a" + platform_id2 = "60d5ec44f9a3c2b6d7e2d11b" + platform_id3 = "60d5ec44f9a3c2b6d7e2d11c" + + guildId = "1234" + guildId2 = "1235" + guildId3 = "1236" + + sample_action = { + "INT_THR": 1, + "UW_DEG_THR": 1, + "PAUSED_T_THR": 1, + "CON_T_THR": 4, + "CON_O_THR": 3, + "EDGE_STR_THR": 5, + "UW_THR_DEG_THR": 5, + "VITAL_T_THR": 4, + "VITAL_O_THR": 3, + "STILL_T_THR": 2, + "STILL_O_THR": 2, + "DROP_H_THR": 2, + "DROP_I_THR": 1, + } + + sample_action2 = { + "INT_THR": 4, + "UW_DEG_THR": 5, + "PAUSED_T_THR": 8, + "CON_T_THR": 4, + "CON_O_THR": 3, + "EDGE_STR_THR": 1, + "UW_THR_DEG_THR": 5, + "VITAL_T_THR": 4, + "VITAL_O_THR": 8, + "STILL_T_THR": 2, + "STILL_O_THR": 24, + "DROP_H_THR": 23, + "DROP_I_THR": 1, + } + sample_action3 = { + "INT_THR": 1, + "UW_DEG_THR": 1, + "PAUSED_T_THR": 1, + "CON_T_THR": 1, + "CON_O_THR": 1, + "EDGE_STR_THR": 1, + "UW_THR_DEG_THR": 1, + "VITAL_T_THR": 1, + "VITAL_O_THR": 1, + "STILL_T_THR": 1, + "STILL_O_THR": 14, + "DROP_H_THR": 13, + "DROP_I_THR": 1, + } + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "id": guildId, + "icon": "111111111111111111111111", + "name": "guild 1", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": sample_action, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334455"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "id": guildId2, + "icon": "111111111111111111111111", + "name": "guild 2", + "resources": ["1020707129214111827"], + "window": {"period_size": 2, "step_size": 2}, + "action": sample_action2, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334455"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "id": guildId3, + "icon": "111111111111111111111111", + "name": "guild 3", + "resources": ["1020707129214111827"], + "window": {"period_size": 4, "step_size": 3}, + "action": sample_action3, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334455"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + ] + ) + platform_obj = Platform(platform_id2) + window, action = platform_obj.get_platform_analyzer_params() + + self.assertEqual(window, {"period_size": 2, "step_size": 2}) + self.assertEqual(sample_action2, action) diff --git a/tests/integration/test_assess_engagement_mention.py b/tests/integration/test_assess_engagement_mention.py index 7c7ca3e..01f4795 100644 --- a/tests/integration/test_assess_engagement_mention.py +++ b/tests/integration/test_assess_engagement_mention.py @@ -1,31 +1,37 @@ from datetime import datetime, timedelta from unittest import TestCase -from discord_analyzer.analysis.utils.member_activity_utils import assess_engagement -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps -from discord_analyzer.analyzer.utils.analyzer_db_manager import AnalyzerDBManager -from tc_core_analyzer_lib.utils.activity import DiscordActivity -from utils.credentials import get_mongo_credentials +from tc_analyzer_lib.algorithms.utils.member_activity_utils import assess_engagement +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.metrics.utils.analyzer_db_manager import AnalyzerDBManager +from tc_analyzer_lib.schemas import GraphSchema +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig from .utils.analyzer_setup import launch_db_access -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform class TestAssessEngagementMentions(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + platform_id = "515151515151515151515151" + self.db_access = launch_db_access(platform_id) self.create_db_connections() + period = datetime(2024, 1, 1) + resources = ["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + def create_db_connections(self): base_analyzer = AnalyzerDBManager() - mongo_creds = get_mongo_credentials() - base_analyzer.set_mongo_database_info( - mongo_db_user=mongo_creds["user"], - mongo_db_password=mongo_creds["password"], - mongo_db_host=mongo_creds["host"], - mongo_db_port=mongo_creds["port"], - ) base_analyzer.database_connect() self.db_connections = base_analyzer.DB_connections @@ -33,15 +39,16 @@ def heatmaps_analytics(self): """ heatmaps are the input for assess_engagement's interaction matrix """ - heatmaps = Heatmaps(DB_connections=self.db_connections, testing=False) - heatmaps_data = heatmaps.analysis_heatmap(guildId=self.guildId, from_start=True) + heatmaps_data = self.heatmaps.start(from_start=True) + analytics_data = {} analytics_data["heatmaps"] = heatmaps_data analytics_data["memberactivities"] = (None, None) + grpah_schema = GraphSchema(platform=self.heatmaps.analyzer_config.platform) self.db_connections.store_analytics_data( - guild_id=self.guildId, + platform_id=self.heatmaps.platform_id, analytics_data=analytics_data, - community_id="123", + graph_schema=grpah_schema, remove_memberactivities=False, remove_heatmaps=False, ) @@ -68,43 +75,64 @@ def test_single_user_mention(self): } platform_id = "515151515151515151515151" - setup_db_guild( + setup_platform( self.db_access, platform_id, - self.guildId, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[platform_id].drop_collection("heatmaps") rawinfo_samples = [] - analyze_dates = set() + analyze_dates = [datetime.now() - timedelta(hours=35 * 24), datetime.now()] for i in range(35 * 24): raw_data_date = datetime.now() - timedelta(hours=i) - sample = { - "type": 0, - "author": "user2", - "content": f"test message {i} @user1", - "user_mentions": ["user1"], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": raw_data_date, - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - analyze_dates.add(raw_data_date.strftime("%Y-%m-%d")) - rawinfo_samples.append(sample) + author = "user1" + mentioned_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": raw_data_date, + "interactions": [ + { + "name": "mention", + "type": "emitter", + "users_engaged_id": [mentioned_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": mentioned_user, + "date": raw_data_date, + "interactions": [ + { + "name": "mention", + "type": "receiver", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) + self.db_access.db_mongo_client[self.heatmaps.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) self.heatmaps_analytics() activity_dict: dict[str, dict] = { @@ -132,15 +160,16 @@ def test_single_user_mention(self): _, activity_dict = assess_engagement( w_i=0, accounts=users_id_list, + platform_id=self.heatmaps.platform_id, action_params=action, period_size=7, - db_access=self.db_access, - channels=["1020707129214111827"], - analyze_dates=list(analyze_dates), + resources=["123"], + resource_identifier="channel_id", + analyze_dates=analyze_dates, activities_name=list(activity_dict.keys()), activity_dict=activity_dict, - activities_to_analyze=[ - DiscordActivity.Mention, - ], + analyzer_config=self.heatmaps.analyzer_config, ) - self.assertEqual(activity_dict["all_active"], {"0": set(["user2"])}) + # user1 sending a message + # user2 being mentioned in the message + self.assertEqual(activity_dict["all_active"], {"0": set(["user1"])}) diff --git a/tests/integration/test_assess_engagement_reactions.py b/tests/integration/test_assess_engagement_reactions.py index 318ca3d..5e3b34d 100644 --- a/tests/integration/test_assess_engagement_reactions.py +++ b/tests/integration/test_assess_engagement_reactions.py @@ -1,31 +1,37 @@ from datetime import datetime, timedelta from unittest import TestCase -from discord_analyzer.analysis.utils.member_activity_utils import assess_engagement -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps -from discord_analyzer.analyzer.utils.analyzer_db_manager import AnalyzerDBManager -from tc_core_analyzer_lib.utils.activity import DiscordActivity -from utils.credentials import get_mongo_credentials +from tc_analyzer_lib.algorithms.utils.member_activity_utils import assess_engagement +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.metrics.utils.analyzer_db_manager import AnalyzerDBManager +from tc_analyzer_lib.schemas import GraphSchema +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig from .utils.analyzer_setup import launch_db_access -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform class TestAssessEngagementReactions(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + platform_id = "515151515151515151515151" + self.db_access = launch_db_access(platform_id) self.create_db_connections() + period = datetime(2024, 1, 1) + resources = ["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + def create_db_connections(self): base_analyzer = AnalyzerDBManager() - mongo_creds = get_mongo_credentials() - base_analyzer.set_mongo_database_info( - mongo_db_user=mongo_creds["user"], - mongo_db_password=mongo_creds["password"], - mongo_db_host=mongo_creds["host"], - mongo_db_port=mongo_creds["port"], - ) base_analyzer.database_connect() self.db_connections = base_analyzer.DB_connections @@ -33,15 +39,16 @@ def heatmaps_analytics(self): """ heatmaps are the input for assess_engagement's interaction matrix """ - heatmaps = Heatmaps(DB_connections=self.db_connections, testing=False) - heatmaps_data = heatmaps.analysis_heatmap(guildId=self.guildId, from_start=True) + heatmaps_data = self.heatmaps.start(from_start=True) + analytics_data = {} analytics_data["heatmaps"] = heatmaps_data analytics_data["memberactivities"] = (None, None) + grpah_schema = GraphSchema(platform=self.heatmaps.analyzer_config.platform) self.db_connections.store_analytics_data( - guild_id=self.guildId, + platform_id=self.heatmaps.platform_id, analytics_data=analytics_data, - community_id="123", + graph_schema=grpah_schema, remove_memberactivities=False, remove_heatmaps=False, ) @@ -68,43 +75,69 @@ def test_single_user_reaction(self): } platform_id = "515151515151515151515151" - setup_db_guild( + setup_platform( self.db_access, platform_id, - self.guildId, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.heatmaps.platform_id].drop_collection( + "heatmaps" + ) + self.db_access.db_mongo_client[self.heatmaps.platform_id].drop_collection( + "rawmemberactivities" + ) rawinfo_samples = [] - analyze_dates = set() + analyze_dates = [datetime.now() - timedelta(hours=35 * 24), datetime.now()] for i in range(35 * 24): raw_data_date = datetime.now() - timedelta(hours=i) - sample = { - "type": 0, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": ["user2,👍"], - "replied_user": None, - "createdDate": raw_data_date, - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - analyze_dates.add(raw_data_date.strftime("%Y-%m-%d")) - rawinfo_samples.append(sample) + author = "user1" + reacted_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": raw_data_date, + "interactions": [ + { + "name": "reaction", + "type": "receiver", + "users_engaged_id": [reacted_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": reacted_user, + "date": raw_data_date, + "interactions": [ + { + "name": "reaction", + "type": "emitter", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) + self.db_access.db_mongo_client[self.heatmaps.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) self.heatmaps_analytics() activity_dict: dict[str, dict] = { @@ -132,15 +165,16 @@ def test_single_user_reaction(self): _, activity_dict = assess_engagement( w_i=0, accounts=users_id_list, + platform_id=self.heatmaps.platform_id, action_params=action, period_size=7, - db_access=self.db_access, - channels=["1020707129214111827"], - analyze_dates=list(analyze_dates), + resources=["123"], + resource_identifier="channel_id", + analyze_dates=analyze_dates, activities_name=list(activity_dict.keys()), activity_dict=activity_dict, - activities_to_analyze=[ - DiscordActivity.Reaction, - ], + analyzer_config=self.heatmaps.analyzer_config, ) - self.assertEqual(activity_dict["all_active"], {"0": set(["user2"])}) + # user1 sending a message + # user2 reacting to the message + self.assertEqual(activity_dict["all_active"], {"0": set(["user1", "user2"])}) diff --git a/tests/integration/test_assess_engagement_replies.py b/tests/integration/test_assess_engagement_replies.py index c971085..1182c21 100644 --- a/tests/integration/test_assess_engagement_replies.py +++ b/tests/integration/test_assess_engagement_replies.py @@ -1,31 +1,37 @@ from datetime import datetime, timedelta from unittest import TestCase -from discord_analyzer.analysis.utils.member_activity_utils import assess_engagement -from discord_analyzer.analyzer.analyzer_heatmaps import Heatmaps -from discord_analyzer.analyzer.utils.analyzer_db_manager import AnalyzerDBManager -from tc_core_analyzer_lib.utils.activity import DiscordActivity -from utils.credentials import get_mongo_credentials +from tc_analyzer_lib.algorithms.utils.member_activity_utils import assess_engagement +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.metrics.utils.analyzer_db_manager import AnalyzerDBManager +from tc_analyzer_lib.schemas import GraphSchema +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig from .utils.analyzer_setup import launch_db_access -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform class TestAssessEngagementReplies(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + platform_id = "515151515151515151515151" + self.db_access = launch_db_access(platform_id) self.create_db_connections() + period = datetime(2024, 1, 1) + resources = ["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + def create_db_connections(self): base_analyzer = AnalyzerDBManager() - mongo_creds = get_mongo_credentials() - base_analyzer.set_mongo_database_info( - mongo_db_user=mongo_creds["user"], - mongo_db_password=mongo_creds["password"], - mongo_db_host=mongo_creds["host"], - mongo_db_port=mongo_creds["port"], - ) base_analyzer.database_connect() self.db_connections = base_analyzer.DB_connections @@ -33,15 +39,16 @@ def heatmaps_analytics(self): """ heatmaps are the input for assess_engagement's interaction matrix """ - heatmaps = Heatmaps(DB_connections=self.db_connections, testing=False) - heatmaps_data = heatmaps.analysis_heatmap(guildId=self.guildId, from_start=True) + heatmaps_data = self.heatmaps.start(from_start=True) + analytics_data = {} analytics_data["heatmaps"] = heatmaps_data analytics_data["memberactivities"] = (None, None) + grpah_schema = GraphSchema(platform=self.heatmaps.analyzer_config.platform) self.db_connections.store_analytics_data( - guild_id=self.guildId, + platform_id=self.heatmaps.platform_id, analytics_data=analytics_data, - community_id="123", + graph_schema=grpah_schema, remove_memberactivities=False, remove_heatmaps=False, ) @@ -66,44 +73,69 @@ def test_single_user_reply(self): "DROP_H_THR": 2, "DROP_I_THR": 1, } - platform_id = "515151515151515151515151" - setup_db_guild( + setup_platform( self.db_access, - platform_id, - self.guildId, + self.heatmaps.platform_id, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.heatmaps.platform_id].drop_collection( + "heatmaps" + ) + self.db_access.db_mongo_client[self.heatmaps.platform_id].drop_collection( + "rawmemberactivities" + ) rawinfo_samples = [] - analyze_dates = set() + analyze_dates = [datetime.now() - timedelta(hours=35 * 24), datetime.now()] for i in range(35 * 24): raw_data_date = datetime.now() - timedelta(hours=i) - sample = { - "type": 19, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": "user2", - "createdDate": raw_data_date, - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - analyze_dates.add(raw_data_date.strftime("%Y-%m-%d")) - rawinfo_samples.append(sample) + author = "user1" + replied_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": raw_data_date, + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": raw_data_date, + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) + self.db_access.db_mongo_client[self.heatmaps.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) self.heatmaps_analytics() activity_dict: dict[str, dict] = { @@ -131,15 +163,14 @@ def test_single_user_reply(self): _, activity_dict = assess_engagement( w_i=0, accounts=users_id_list, + platform_id=self.heatmaps.platform_id, action_params=action, period_size=7, - db_access=self.db_access, - channels=["1020707129214111827"], - analyze_dates=list(analyze_dates), + resources=["123"], + resource_identifier="channel_id", + analyze_dates=analyze_dates, activities_name=list(activity_dict.keys()), activity_dict=activity_dict, - activities_to_analyze=[ - DiscordActivity.Reply, - ], + analyzer_config=self.heatmaps.analyzer_config, ) self.assertEqual(activity_dict["all_active"], {"0": set(["user1"])}) diff --git a/tests/integration/test_automation_base_create_manual_saga.py b/tests/integration/test_automation_base_create_manual_saga.py index 72f1c6b..ab3de7a 100644 --- a/tests/integration/test_automation_base_create_manual_saga.py +++ b/tests/integration/test_automation_base_create_manual_saga.py @@ -1,6 +1,6 @@ import unittest -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access diff --git a/tests/integration/test_automation_base_get_users_from_memberactivities_empty_database.py b/tests/integration/test_automation_base_get_users_from_memberactivities_empty_database.py index d68d294..8a59bfc 100644 --- a/tests/integration/test_automation_base_get_users_from_memberactivities_empty_database.py +++ b/tests/integration/test_automation_base_get_users_from_memberactivities_empty_database.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -9,14 +9,14 @@ def test_automation_base_get_users_no_data_new_disengaged(): """ try to get the users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_disengaged" + platform_id, category="all_new_disengaged" ) assert users1 == [] @@ -27,14 +27,14 @@ def test_automation_base_get_users_no_data_new_active(): """ try to get the users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_active" + platform_id, category="all_new_active" ) assert users1 == [] @@ -45,25 +45,21 @@ def test_automation_base_get_users_empty_new_disengaged(): """ get empty users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") - db_access.db_mongo_client[guildId]["memberactivities"].delete_many({}) + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id]["memberactivities"].delete_many({}) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -116,7 +112,7 @@ def test_automation_base_get_users_empty_new_disengaged(): automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_disengaged" + platform_id, category="all_new_disengaged" ) assert users1 == [] @@ -127,25 +123,21 @@ def test_automation_base_get_users_empty_new_active(): """ get empty users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") - db_access.db_mongo_client[guildId]["memberactivities"].delete_many({}) + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id]["memberactivities"].delete_many({}) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -198,7 +190,7 @@ def test_automation_base_get_users_empty_new_active(): automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_active" + platform_id, category="all_new_active" ) assert users1 == [] diff --git a/tests/integration/test_automation_base_get_users_from_memberactivities_filled_database.py b/tests/integration/test_automation_base_get_users_from_memberactivities_filled_database.py index 6ace0da..8797173 100644 --- a/tests/integration/test_automation_base_get_users_from_memberactivities_filled_database.py +++ b/tests/integration/test_automation_base_get_users_from_memberactivities_filled_database.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -9,24 +9,20 @@ def test_automation_base_get_users_one_user_new_disengaged(): """ get empty users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -79,7 +75,7 @@ def test_automation_base_get_users_one_user_new_disengaged(): automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_disengaged" + platform_id, category="all_new_disengaged" ) assert users1 == ["user1"] @@ -90,24 +86,20 @@ def test_automation_base_get_users_multiple_user_new_disengaged(): """ get empty users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -160,7 +152,7 @@ def test_automation_base_get_users_multiple_user_new_disengaged(): automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_disengaged" + platform_id, category="all_new_disengaged" ) assert users1 == ["user1", "user2"] @@ -171,25 +163,21 @@ def test_automation_base_get_users_multiple_user_new_active(): """ get empty users in case of no data available """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("memberactivities") - db_access.db_mongo_client[guildId]["memberactivities"].delete_many({}) + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id]["memberactivities"].delete_many({}) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -242,7 +230,7 @@ def test_automation_base_get_users_multiple_user_new_active(): automation_base = AutomationBase() users1, users2 = automation_base._get_users_from_memberactivities( - guildId, category="all_new_active" + platform_id, category="all_new_active" ) assert users1 == ["user5", "user8", "user12"] diff --git a/tests/integration/test_automation_base_prepare_names_globalname.py b/tests/integration/test_automation_base_prepare_names_globalname.py index d0d3831..4a83ed8 100644 --- a/tests/integration/test_automation_base_prepare_names_globalname.py +++ b/tests/integration/test_automation_base_prepare_names_globalname.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -10,12 +10,12 @@ def test_prepare_ngu_some_data_globalname_strategy(): test the preparation module in case of some data available the output should be have the names of the field `globalName` """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") - db_access.db_mongo_client[guildId]["guildmembers"].insert_many( + db_access.db_mongo_client[platform_id]["guildmembers"].insert_many( [ { "discordId": "1111", @@ -87,7 +87,7 @@ def test_prepare_ngu_some_data_globalname_strategy(): automation_base = AutomationBase() id_names = automation_base.prepare_names( - guild_id=guildId, + guild_id=platform_id, user_ids=["1111", "1112", "1113", "1116"], user_field="globalName", ) diff --git a/tests/integration/test_automation_base_prepare_names_ngu.py b/tests/integration/test_automation_base_prepare_names_ngu.py index e73af3a..70c695f 100644 --- a/tests/integration/test_automation_base_prepare_names_ngu.py +++ b/tests/integration/test_automation_base_prepare_names_ngu.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -10,13 +10,13 @@ def test_prepare_ngu_no_data(): test the ngu preparation module in case of no data available the output should be an empty list """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") automation_base = AutomationBase() - names = automation_base.prepare_names(guild_id=guildId, user_ids=[]) + names = automation_base.prepare_names(guild_id=platform_id, user_ids=[]) assert names == [] @@ -26,12 +26,12 @@ def test_prepare_ngu_some_data_ngu_strategy(): test the name preparation module in case of some data available the output should be have the names with the priority of ngu """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") - db_access.db_mongo_client[guildId]["guildmembers"].insert_many( + db_access.db_mongo_client[platform_id]["guildmembers"].insert_many( [ { "discordId": "1111", @@ -103,7 +103,7 @@ def test_prepare_ngu_some_data_ngu_strategy(): automation_base = AutomationBase() id_names = automation_base.prepare_names( - guild_id=guildId, + guild_id=platform_id, user_ids=["1111", "1112", "1113", "1116", "1119"], user_field="ngu", ) diff --git a/tests/integration/test_automation_base_prepare_names_nickname.py b/tests/integration/test_automation_base_prepare_names_nickname.py index c121a18..d516001 100644 --- a/tests/integration/test_automation_base_prepare_names_nickname.py +++ b/tests/integration/test_automation_base_prepare_names_nickname.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -10,12 +10,12 @@ def test_prepare_ngu_some_data_nickname_strategy(): test the preparation module in case of some data available the output should be have the names of the field `nickname` """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") - db_access.db_mongo_client[guildId]["guildmembers"].insert_many( + db_access.db_mongo_client[platform_id]["guildmembers"].insert_many( [ { "discordId": "1111", @@ -87,7 +87,7 @@ def test_prepare_ngu_some_data_nickname_strategy(): automation_base = AutomationBase() id_names = automation_base.prepare_names( - guild_id=guildId, + guild_id=platform_id, user_ids=["1111", "1112", "1113", "1116"], user_field="nickname", ) diff --git a/tests/integration/test_automation_base_prepare_names_usernames.py b/tests/integration/test_automation_base_prepare_names_usernames.py index 6c5c0ba..89edf25 100644 --- a/tests/integration/test_automation_base_prepare_names_usernames.py +++ b/tests/integration/test_automation_base_prepare_names_usernames.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase from .utils.analyzer_setup import launch_db_access @@ -10,12 +10,12 @@ def test_prepare_ngu_some_data_nickname_strategy(): test the preparation module in case of some data available the output should be have the names of the field `username` """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") - db_access.db_mongo_client[guildId]["guildmembers"].insert_many( + db_access.db_mongo_client[platform_id]["guildmembers"].insert_many( [ { "discordId": "1111", @@ -87,7 +87,7 @@ def test_prepare_ngu_some_data_nickname_strategy(): automation_base = AutomationBase() id_names = automation_base.prepare_names( - guild_id=guildId, + guild_id=platform_id, user_ids=["1111", "1112", "1113", "1116"], user_field="username", ) @@ -105,12 +105,12 @@ def test_prepare_ngu_some_data_username_strategy(): test the username preparation module in case of some data available the output should be have the names of the field `username` """ - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "1234" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guildId].drop_collection("guildmembers") + db_access.db_mongo_client[platform_id].drop_collection("guildmembers") - db_access.db_mongo_client[guildId]["guildmembers"].insert_many( + db_access.db_mongo_client[platform_id]["guildmembers"].insert_many( [ { "discordId": "1111", @@ -182,7 +182,7 @@ def test_prepare_ngu_some_data_username_strategy(): automation_base = AutomationBase() id_names = automation_base.prepare_names( - guild_id=guildId, + guild_id=platform_id, user_ids=["1111", "1112", "1113", "1116"], user_field="username", ) diff --git a/tests/integration/test_automation_db_load_from_db.py b/tests/integration/test_automation_db_load_from_db.py index 64fc96a..4951bec 100644 --- a/tests/integration/test_automation_db_load_from_db.py +++ b/tests/integration/test_automation_db_load_from_db.py @@ -2,9 +2,9 @@ import unittest from datetime import datetime, timedelta -from automation.utils.model import AutomationDB from dotenv import load_dotenv -from utils.mongo import MongoSingleton +from tc_analyzer_lib.automation.utils.model import AutomationDB +from tc_analyzer_lib.utils.mongo import MongoSingleton class TestAutomationDBLoadFromDB(unittest.TestCase): diff --git a/tests/integration/test_automation_db_save_to_db.py b/tests/integration/test_automation_db_save_to_db.py index 607e772..f360321 100644 --- a/tests/integration/test_automation_db_save_to_db.py +++ b/tests/integration/test_automation_db_save_to_db.py @@ -2,10 +2,10 @@ import unittest from datetime import datetime, timezone -from automation.utils.interfaces import Automation -from automation.utils.model import AutomationDB from dotenv import load_dotenv -from utils.mongo import MongoSingleton +from tc_analyzer_lib.automation.utils.interfaces import Automation +from tc_analyzer_lib.automation.utils.model import AutomationDB +from tc_analyzer_lib.utils.mongo import MongoSingleton class TestAutomationDBSaveToDB(unittest.TestCase): diff --git a/tests/integration/test_automation_fire_messages_mongo_saga_message_ngu_strategy.py b/tests/integration/test_automation_fire_messages_mongo_saga_message_ngu_strategy.py index 4cfda71..a084620 100644 --- a/tests/integration/test_automation_fire_messages_mongo_saga_message_ngu_strategy.py +++ b/tests/integration/test_automation_fire_messages_mongo_saga_message_ngu_strategy.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from automation.automation_workflow import AutomationWorkflow -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, @@ -16,9 +16,10 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): check the created messages in saga """ guild_id = "1234" - db_access = launch_db_access(guild_id) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guild_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") db_access.db_mongo_client["Saga"].drop_collection("sagas") db_access.db_mongo_client[guild_id].drop_collection("guildmembers") db_access.db_mongo_client["Automation"].drop_collection("automations") @@ -145,19 +146,15 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): automation.to_dict() ) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guild_id]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -209,7 +206,7 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): ) automation_workflow = AutomationWorkflow() - automation_workflow.start(guild_id) + automation_workflow.start(platform_id, guild_id) count = db_access.db_mongo_client["Saga"]["sagas"].count_documents({}) assert count == 4 diff --git a/tests/integration/test_automation_fire_messages_mongo_saga_message_nickname_strategy.py b/tests/integration/test_automation_fire_messages_mongo_saga_message_nickname_strategy.py index 932e6d6..754a4b1 100644 --- a/tests/integration/test_automation_fire_messages_mongo_saga_message_nickname_strategy.py +++ b/tests/integration/test_automation_fire_messages_mongo_saga_message_nickname_strategy.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from automation.automation_workflow import AutomationWorkflow -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, @@ -16,9 +16,10 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate check the created messages in saga """ guild_id = "1234" - db_access = launch_db_access(guild_id) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guild_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") db_access.db_mongo_client["Saga"].drop_collection("sagas") db_access.db_mongo_client[guild_id].drop_collection("guildmembers") db_access.db_mongo_client["Automation"].drop_collection("automations") @@ -145,19 +146,15 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate automation.to_dict() ) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guild_id]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -209,7 +206,7 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate ) automation_workflow = AutomationWorkflow() - automation_workflow.start(guild_id) + automation_workflow.start(platform_id, guild_id) count = db_access.db_mongo_client["Saga"]["sagas"].count_documents({}) assert count == 4 diff --git a/tests/integration/test_automation_fire_messages_mongo_saga_message_no_handlebar.py b/tests/integration/test_automation_fire_messages_mongo_saga_message_no_handlebar.py index cee59c6..3f2715a 100644 --- a/tests/integration/test_automation_fire_messages_mongo_saga_message_no_handlebar.py +++ b/tests/integration/test_automation_fire_messages_mongo_saga_message_no_handlebar.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from automation.automation_workflow import AutomationWorkflow -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, @@ -16,9 +16,10 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): check the created messages in saga """ guild_id = "1234" - db_access = launch_db_access(guild_id) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guild_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") db_access.db_mongo_client["Saga"].drop_collection("sagas") db_access.db_mongo_client[guild_id].drop_collection("guildmembers") db_access.db_mongo_client["Automation"].drop_collection("automations") @@ -140,19 +141,15 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): automation.to_dict() ) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guild_id]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -204,7 +201,7 @@ def test_automation_fire_message_check_mongodb_document_messages_ngu_strategy(): ) automation_workflow = AutomationWorkflow() - automation_workflow.start(guild_id) + automation_workflow.start(platform_id, guild_id) count = db_access.db_mongo_client["Saga"]["sagas"].count_documents({}) assert count == 4 diff --git a/tests/integration/test_automation_fire_messages_mongo_saga_message_username_strategy.py b/tests/integration/test_automation_fire_messages_mongo_saga_message_username_strategy.py index 0c7d744..2c6fe1c 100644 --- a/tests/integration/test_automation_fire_messages_mongo_saga_message_username_strategy.py +++ b/tests/integration/test_automation_fire_messages_mongo_saga_message_username_strategy.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from automation.automation_workflow import AutomationWorkflow -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, @@ -16,9 +16,10 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate check the created messages in saga """ guild_id = "1234" - db_access = launch_db_access(guild_id) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) - db_access.db_mongo_client[guild_id].drop_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") db_access.db_mongo_client["Saga"].drop_collection("sagas") db_access.db_mongo_client[guild_id].drop_collection("guildmembers") db_access.db_mongo_client["Automation"].drop_collection("automations") @@ -145,19 +146,15 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate automation.to_dict() ) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guild_id]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -209,7 +206,7 @@ def test_automation_fire_message_check_mongodb_document_messages_username_strate ) automation_workflow = AutomationWorkflow() - automation_workflow.start(guild_id) + automation_workflow.start(platform_id, guild_id) count = db_access.db_mongo_client["Saga"]["sagas"].count_documents({}) assert count == 4 diff --git a/tests/integration/test_decentralization_score.py b/tests/integration/test_decentralization_score.py index 9a49f55..bc722fa 100644 --- a/tests/integration/test_decentralization_score.py +++ b/tests/integration/test_decentralization_score.py @@ -1,5 +1,6 @@ # the nodes of the graph are partially connected -from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.algorithms.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -11,59 +12,65 @@ def test_decentralization_score(): To see more info for this test: https://miro.com/app/board/uXjVM7GdYqo=/?moveToWidget=3458764558210553321&cot=14 """ - guildId = "1234" neo4j_ops = Neo4jOps.get_instance() - centrality = Centerality() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") # timestamps today = 1689280200.0 yesterday = 1689193800.0 + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + centrality = Centerality(platform_id, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) network_decentrality = centrality.compute_network_decentrality( - guildId=guildId, from_start=True, save=True + from_start=True, save=True ) # because python is not good with equality comparison of float values diff --git a/tests/integration/test_degree_centrality_multiple_guilds.py b/tests/integration/test_degree_centrality_multiple_guilds.py index e819ae0..8dcc8e8 100644 --- a/tests/integration/test_degree_centrality_multiple_guilds.py +++ b/tests/integration/test_degree_centrality_multiple_guilds.py @@ -1,6 +1,7 @@ # we have nodes of a community is connected to another one # meaning we have nodes available in more than one community -from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.algorithms.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -14,7 +15,6 @@ def test_multiple_guilds(): To see more info for this test: https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 """ - guildId = "1234" neo4j_ops = Neo4jOps.get_instance() # deleting all data @@ -24,60 +24,66 @@ def test_multiple_guilds(): today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" - guildId2 = "1235" + graph_schema = GraphSchema(platform="discord") + platform_id1 = "5151515151515" + platform_id2 = "5151515151516" + centrality = Centerality(platform_id2, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (f2:DiscordAccount) - -[:IS_MEMBER]->(guild2:Guild {{guildId: '{guildId2}'}}) - CREATE (g2:DiscordAccount) -[:IS_MEMBER]->(guild2) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - SET f2.userId = "1005" - SET g2.userId = "1006" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - MERGE (f2) -[r13:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(g2) - MERGE (g2) -[r14:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(f2) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' - SET r13.guildId = '{guildId2}' - SET r14.guildId = '{guildId2}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id1}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + CREATE (f2:{user_label}) + -[:{is_member}]->(guild2:{platform_label} {{id: '{platform_id2}'}}) + CREATE (g2:{user_label}) -[:{is_member}]->(guild2) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + SET f2.id = "1005" + SET g2.id = "1006" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + MERGE (f2) -[r13:{interacted_with} {{date: {yesterday}, weight: 3}}]->(g2) + MERGE (g2) -[r14:{interacted_with} {{date: {yesterday}, weight: 3}}]->(f2) + SET r.platformId = '{platform_id1}' + SET r2.platformId = '{platform_id1}' + SET r3.platformId = '{platform_id1}' + SET r4.platformId = '{platform_id1}' + SET r5.platformId = '{platform_id1}' + SET r6.platformId = '{platform_id1}' + SET r7.platformId = '{platform_id1}' + SET r8.platformId = '{platform_id1}' + SET r9.platformId = '{platform_id1}' + SET r10.platformId = '{platform_id1}' + SET r11.platformId = '{platform_id1}' + SET r12.platformId = '{platform_id1}' + SET r13.platformId = '{platform_id2}' + SET r14.platformId = '{platform_id2}' """ ) - centrality = Centerality() + centrality = Centerality(platform_id2, graph_schema) degree_centrality = centrality.compute_degree_centerality( - guildId=guildId2, direction="undirected", normalize=True, weighted=False, diff --git a/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py b/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py index 1955e7f..27915ec 100644 --- a/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py +++ b/tests/integration/test_degree_centrality_multiple_guilds_preserve_parallel.py @@ -1,6 +1,7 @@ # we have nodes of a community is connected to another one # meaning we have nodes available in more than one community -from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.algorithms.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -14,10 +15,8 @@ def test_multiple_guilds_preserve_parallel(): To see more info for this test: https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 """ - guildId = "1234" neo4j_ops = Neo4jOps.get_instance() - centrality = Centerality() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") @@ -25,59 +24,65 @@ def test_multiple_guilds_preserve_parallel(): today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" - guildId2 = "1235" + graph_schema = GraphSchema(platform="discord") + platform_id1 = "5151515151515" + platform_id2 = "5151515151516" + centrality = Centerality(platform_id2, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (f2:DiscordAccount) - -[:IS_MEMBER]->(guild2:Guild {{guildId: '{guildId2}'}}) - CREATE (g2:DiscordAccount) -[:IS_MEMBER]->(guild2) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - SET f2.userId = "1005" - SET g2.userId = "1006" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - MERGE (f2) -[r13:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(g2) - MERGE (g2) -[r14:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(f2) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' - SET r13.guildId = '{guildId2}' - SET r14.guildId = '{guildId2}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id1}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + CREATE (f2:{user_label}) + -[:{is_member}]->(guild2:{platform_label} {{id: '{platform_id2}'}}) + CREATE (g2:{user_label}) -[:{is_member}]->(guild2) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + SET f2.id = "1005" + SET g2.id = "1006" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + MERGE (f2) -[r13:{interacted_with} {{date: {yesterday}, weight: 3}}]->(g2) + MERGE (g2) -[r14:{interacted_with} {{date: {yesterday}, weight: 3}}]->(f2) + SET r.platformId = '{platform_id1}' + SET r2.platformId = '{platform_id1}' + SET r3.platformId = '{platform_id1}' + SET r4.platformId = '{platform_id1}' + SET r5.platformId = '{platform_id1}' + SET r6.platformId = '{platform_id1}' + SET r7.platformId = '{platform_id1}' + SET r8.platformId = '{platform_id1}' + SET r9.platformId = '{platform_id1}' + SET r10.platformId = '{platform_id1}' + SET r11.platformId = '{platform_id1}' + SET r12.platformId = '{platform_id1}' + SET r13.platformId = '{platform_id2}' + SET r14.platformId = '{platform_id2}' """ ) degree_centrality = centrality.compute_degree_centerality( - guildId=guildId2, direction="undirected", normalize=False, weighted=False, diff --git a/tests/integration/test_degree_centrality_parallel_preservation.py b/tests/integration/test_degree_centrality_parallel_preservation.py index b43e09b..5e5e286 100644 --- a/tests/integration/test_degree_centrality_parallel_preservation.py +++ b/tests/integration/test_degree_centrality_parallel_preservation.py @@ -1,5 +1,6 @@ # the nodes of the graph are partially connected -from discord_analyzer.analysis.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.algorithms.neo4j_analysis.centrality import Centerality +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -11,7 +12,6 @@ def test_partially_connected_coeffs(): To see more info for this test: https://miro.com/app/board/uXjVM7GdYqo=/?share_link_id=105382864070 """ - guildId = "1234" neo4j_ops = Neo4jOps.get_instance() # deleting all data @@ -20,52 +20,56 @@ def test_partially_connected_coeffs(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" - guildId = "1234" + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - centrality = Centerality() + centrality = Centerality(platform_id, graph_schema) degree_centrality = centrality.compute_degree_centerality( - guildId=guildId, direction="undirected", normalize=True, weighted=False, diff --git a/tests/integration/test_discord_heatmaps_config.py b/tests/integration/test_discord_heatmaps_config.py new file mode 100644 index 0000000..0773474 --- /dev/null +++ b/tests/integration/test_discord_heatmaps_config.py @@ -0,0 +1,89 @@ +from unittest import TestCase + +from tc_analyzer_lib.schemas import ActivityDirection, ActivityType +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig + + +class TestDiscordAnalyzerConfig(TestCase): + def test_discord_schema_overview(self): + # checking the analyzer schema for discord platform + config = DiscordAnalyzerConfig() + self.assertEqual(config.platform, "discord") + self.assertEqual(config.resource_identifier, "channel_id") + + # we have 8 hourly analytics + self.assertEqual(len(config.hourly_analytics), 8) + + # we have 3 raw analytics + self.assertEqual(len(config.raw_analytics), 3) + + def test_discord_schema_hourly_analytics(self): + hourly_analytics = DiscordAnalyzerConfig().hourly_analytics + for anlaytics in hourly_analytics: + if anlaytics.name == "thr_messages": + self.assertEqual(anlaytics.type, ActivityType.ACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.EMITTER) + self.assertEqual(anlaytics.member_activities_used, True) + self.assertEqual( + anlaytics.rawmemberactivities_condition, + {"metadata.thread_id": {"$ne": None}}, + ) + elif anlaytics.name == "lone_messages": + self.assertEqual(anlaytics.type, ActivityType.ACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.EMITTER) + self.assertEqual(anlaytics.member_activities_used, True) + self.assertEqual( + anlaytics.rawmemberactivities_condition, + {"metadata.thread_id": None}, + ) + elif anlaytics.name == "replier": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.RECEIVER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + elif anlaytics.name == "replied": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.EMITTER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + elif anlaytics.name == "mentioner": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.EMITTER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + elif anlaytics.name == "mentioned": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.RECEIVER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + elif anlaytics.name == "reacter": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.RECEIVER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + elif anlaytics.name == "reacted": + self.assertEqual(anlaytics.type, ActivityType.INTERACTION) + self.assertEqual(anlaytics.direction, ActivityDirection.EMITTER) + self.assertEqual(anlaytics.member_activities_used, False) + self.assertIsNone(anlaytics.rawmemberactivities_condition) + else: + raise ValueError("No more hourly analytics for discord be available!") + + def test_discord_schema_raw_analytics(self): + raw_analytics = DiscordAnalyzerConfig().raw_analytics + for analytics in raw_analytics: + if analytics.name == "replied_per_acc": + self.assertTrue(analytics.member_activities_used) + self.assertEqual(analytics.type, ActivityType.INTERACTION) + self.assertEqual(analytics.direction, ActivityDirection.EMITTER) + elif analytics.name == "mentioner_per_acc": + self.assertTrue(analytics.member_activities_used) + self.assertEqual(analytics.type, ActivityType.INTERACTION) + self.assertEqual(analytics.direction, ActivityDirection.EMITTER) + elif analytics.name == "reacted_per_acc": + self.assertTrue(analytics.member_activities_used) + self.assertEqual(analytics.type, ActivityType.INTERACTION) + else: + raise ValueError( + "No more raw analytics for discord should be available!" + ) diff --git a/tests/integration/test_exclude_bots.py b/tests/integration/test_exclude_bots.py index e2c606b..7da1969 100644 --- a/tests/integration/test_exclude_bots.py +++ b/tests/integration/test_exclude_bots.py @@ -2,8 +2,8 @@ import numpy as np -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_excluding_bots_heatmaps(): @@ -11,8 +11,7 @@ def test_excluding_bots_heatmaps(): test if we're excluding bots from analyzer pipeline """ platform_id = "515151515151515151515151" - guildId = "1234567" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "user0", @@ -27,18 +26,17 @@ def test_excluding_bots_heatmaps(): # A guild connected at 35 days ago connected_days_before = 35 - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, discordId_isbot=acc_isbots, days_ago_period=connected_days_before, ) window_start_date = datetime.now() - timedelta(days=connected_days_before) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -47,35 +45,56 @@ def test_excluding_bots_heatmaps(): # 30 days # 24 * 30 for i in range(720): - sample = { - "type": 19, - "author": acc_id[i % len(acc_id)], - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = acc_id[i % len(acc_id)] + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) analyzer.run_once() - db_access.db_mongo_client[guildId] + db_access.db_mongo_client[platform_id] pipeline = [ # Filter documents based on date - {"$match": {"date": {"$gte": window_start_date.strftime("%Y-%m-%d")}}}, - {"$group": {"_id": "$account_name"}}, + {"$match": {"date": {"$gte": window_start_date}}}, + {"$group": {"_id": "$user"}}, { "$group": { "_id": None, @@ -83,10 +102,12 @@ def test_excluding_bots_heatmaps(): } }, ] - result = list(db_access.db_mongo_client[guildId]["heatmaps"].aggregate(pipeline)) + result = list( + db_access.db_mongo_client[platform_id]["heatmaps"].aggregate(pipeline) + ) - print(result[0]["uniqueAccounts"]) - print(f"np.array(acc_id)[acc_isbots]: {np.array(acc_id)[acc_isbots]}") + # print(result[0]["uniqueAccounts"]) + # print(f"np.array(acc_id)[acc_isbots]: {np.array(acc_id)[acc_isbots]}") # checking if the bots are not included in heatmaps for account_name in result[0]["uniqueAccounts"]: diff --git a/tests/integration/test_fragmentation_score.py b/tests/integration/test_fragmentation_score.py index 2957c91..cdbf69e 100644 --- a/tests/integration/test_fragmentation_score.py +++ b/tests/integration/test_fragmentation_score.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -10,7 +11,6 @@ def test_avg_clustering_coeff(): """ neo4j_ops = Neo4jOps.get_instance() - neo4j_analytics = Neo4JAnalytics() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") @@ -21,54 +21,58 @@ def test_avg_clustering_coeff(): datetime.fromtimestamp(yesterday) - timedelta(days=1) ).timestamp() - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + neo4j_analytics = Neo4JAnalytics(platform_id, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - neo4j_analytics.compute_local_clustering_coefficient( - guildId=guildId, from_start=False - ) + neo4j_analytics.compute_local_clustering_coefficient(from_start=False) fragmentation_score = neo4j_analytics.compute_fragmentation_score( - guildId=guildId, past_window_date=past_window_date, scale_fragmentation_score=200, ) diff --git a/tests/integration/test_fragmentation_score_exclude_past.py b/tests/integration/test_fragmentation_score_exclude_past.py index 0f03a29..39dde9f 100644 --- a/tests/integration/test_fragmentation_score_exclude_past.py +++ b/tests/integration/test_fragmentation_score_exclude_past.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -10,7 +11,6 @@ def test_avg_clustering_exclude_past(): """ neo4j_ops = Neo4jOps.get_instance() - neo4j_analytics = Neo4JAnalytics() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") @@ -23,63 +23,66 @@ def test_avg_clustering_exclude_past(): past_window_date = ( datetime.fromtimestamp(yesterday) - timedelta(days=1) ).timestamp() + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + neo4j_analytics = Neo4JAnalytics(platform_id, graph_schema) - guildId = "1234" + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - MERGE (a) -[r13:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(d) - MERGE (d) -[r14:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(b) - MERGE (b) -[r15:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(e) - MERGE (e) -[r16:INTERACTED_WITH {{date: {past_three_days}, weight: 3}}]->(c) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + MERGE (a) -[r13:{interacted_with} {{date: {past_three_days}, weight: 3}}]->(d) + MERGE (d) -[r14:{interacted_with} {{date: {past_three_days}, weight: 3}}]->(b) + MERGE (b) -[r15:{interacted_with} {{date: {past_three_days}, weight: 3}}]->(e) + MERGE (e) -[r16:{interacted_with} {{date: {past_three_days}, weight: 3}}]->(c) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' - SET r13.guildId = '{guildId}' - SET r14.guildId = '{guildId}' - SET r15.guildId = '{guildId}' - SET r16.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' + SET r13.platformId = '{platform_id}' + SET r14.platformId = '{platform_id}' + SET r15.platformId = '{platform_id}' + SET r16.platformId = '{platform_id}' """ ) - neo4j_analytics.compute_local_clustering_coefficient( - guildId=guildId, from_start=False - ) + neo4j_analytics.compute_local_clustering_coefficient(from_start=False) fragmentation_score = neo4j_analytics.compute_fragmentation_score( - guildId=guildId, past_window_date=past_window_date, scale_fragmentation_score=200, ) diff --git a/tests/integration/test_fragmentation_score_from_start.py b/tests/integration/test_fragmentation_score_from_start.py index 1f11ca8..b1c0e90 100644 --- a/tests/integration/test_fragmentation_score_from_start.py +++ b/tests/integration/test_fragmentation_score_from_start.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -10,7 +11,6 @@ def test_avg_clustering_coeff_from_start(): """ neo4j_ops = Neo4jOps.get_instance() - neo4j_analytics = Neo4JAnalytics() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") @@ -20,54 +20,59 @@ def test_avg_clustering_coeff_from_start(): past_window_date = ( datetime.fromtimestamp(yesterday) - timedelta(days=1) ).timestamp() - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + neo4j_analytics = Neo4JAnalytics(platform_id, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - neo4j_analytics.compute_local_clustering_coefficient( - guildId=guildId, from_start=True - ) + neo4j_analytics.compute_local_clustering_coefficient(from_start=True) fragmentation_score = neo4j_analytics.compute_fragmentation_score( - guildId=guildId, past_window_date=past_window_date, scale_fragmentation_score=200, ) diff --git a/tests/integration/test_fragmentation_score_rescaling.py b/tests/integration/test_fragmentation_score_rescaling.py index 020f6c6..0273537 100644 --- a/tests/integration/test_fragmentation_score_rescaling.py +++ b/tests/integration/test_fragmentation_score_rescaling.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -10,7 +11,6 @@ def test_avg_clustering_coeff_scaling(): """ neo4j_ops = Neo4jOps.get_instance() - neo4j_analytics = Neo4JAnalytics() # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") @@ -21,54 +21,59 @@ def test_avg_clustering_coeff_scaling(): datetime.fromtimestamp(yesterday) - timedelta(days=1) ).timestamp() - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + neo4j_analytics = Neo4JAnalytics(platform_id, graph_schema) + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{guildId: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - neo4j_analytics.compute_local_clustering_coefficient( - guildId=guildId, from_start=True - ) + neo4j_analytics.compute_local_clustering_coefficient(from_start=True) fragmentation_score = neo4j_analytics.compute_fragmentation_score( - guildId=guildId, past_window_date=past_window_date, scale_fragmentation_score=100, ) diff --git a/tests/integration/test_generated_graph_period_1_year_run_once.py b/tests/integration/test_generated_graph_period_1_year_run_once.py index 31fdc2e..9e87207 100644 --- a/tests/integration/test_generated_graph_period_1_year_run_once.py +++ b/tests/integration/test_generated_graph_period_1_year_run_once.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_one_year_period_run_once_available_analytics(): @@ -19,7 +19,7 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): guildId = "1234" community_id = "aabbccddeeff001122334455" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) neo4j_ops = Neo4jOps.get_instance() neo4j_ops.gds.run_cypher( @@ -33,17 +33,16 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=360, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=354), count=350 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=360), count=356 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label + results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -116,13 +139,13 @@ def test_networkgraph_one_year_period_run_once_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_1year.py b/tests/integration/test_generated_graph_period_1year.py index bfe2911..e30cbe7 100644 --- a/tests/integration/test_generated_graph_period_1year.py +++ b/tests/integration/test_generated_graph_period_1year.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_one_year_period_recompute_available_analytics(): @@ -33,17 +33,16 @@ def test_networkgraph_one_year_period_recompute_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=360, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_one_year_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=354), count=353 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_one_year_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=360), count=359 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_one_year_period_recompute_available_analytics(): # 24 hours # 360 days for i in range(24 * 360): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() + + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -114,13 +137,13 @@ def test_networkgraph_one_year_period_recompute_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_35_days.py b/tests/integration/test_generated_graph_period_35_days.py index 1878ba3..3e1722b 100644 --- a/tests/integration/test_generated_graph_period_35_days.py +++ b/tests/integration/test_generated_graph_period_35_days.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_35_days_period_recompute_available_analytics(): @@ -33,17 +33,16 @@ def test_networkgraph_35_days_period_recompute_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=35, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_35_days_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=28), count=27 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_35_days_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=35), count=34 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_35_days_period_recompute_available_analytics(): # 24 hours # 35 days for i in range(24 * 35): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() + + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -114,13 +137,13 @@ def test_networkgraph_35_days_period_recompute_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_35_days_run_once.py b/tests/integration/test_generated_graph_period_35_days_run_once.py index ed7109e..10f56a0 100644 --- a/tests/integration/test_generated_graph_period_35_days_run_once.py +++ b/tests/integration/test_generated_graph_period_35_days_run_once.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_35_days_period_run_once_available_analytics(): @@ -33,17 +33,16 @@ def test_networkgraph_35_days_period_run_once_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=35, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_35_days_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=28), count=24 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_35_days_period_run_once_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=35), count=31 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_35_days_period_run_once_available_analytics(): # 24 hours # 35 days for i in range(24 * 35): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label + results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -115,13 +138,13 @@ def test_networkgraph_35_days_period_run_once_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_3_months.py b/tests/integration/test_generated_graph_period_3_months.py index 365f55f..d39b965 100644 --- a/tests/integration/test_generated_graph_period_3_months.py +++ b/tests/integration/test_generated_graph_period_3_months.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_three_months_period_recompute_available_analytics(): @@ -32,17 +32,16 @@ def test_networkgraph_three_months_period_recompute_available_analytics(): "973993299281076285", "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=90, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -50,7 +49,7 @@ def test_networkgraph_three_months_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=84), count=83 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -60,7 +59,7 @@ def test_networkgraph_three_months_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=90), count=89 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -69,32 +68,56 @@ def test_networkgraph_three_months_period_recompute_available_analytics(): # 24 hours # 90 days for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() + + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -113,13 +136,13 @@ def test_networkgraph_three_months_period_recompute_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_3_months_run_once.py b/tests/integration/test_generated_graph_period_3_months_run_once.py index 34764fd..2947722 100644 --- a/tests/integration/test_generated_graph_period_3_months_run_once.py +++ b/tests/integration/test_generated_graph_period_3_months_run_once.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_three_months_period_run_once_available_analytics(): @@ -33,17 +33,16 @@ def test_networkgraph_three_months_period_run_once_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=90, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_three_months_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=84), count=80 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_three_months_period_run_once_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=90), count=86 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_three_months_period_run_once_available_analytics(): # 24 hours # 90 days for i in range(24 * 90): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label + results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -115,13 +138,13 @@ def test_networkgraph_three_months_period_run_once_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{guildId: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_6_months.py b/tests/integration/test_generated_graph_period_6_months.py index 3434bda..91f6402 100644 --- a/tests/integration/test_generated_graph_period_6_months.py +++ b/tests/integration/test_generated_graph_period_6_months.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_six_months_period_recompute_available_analytics(): @@ -33,17 +33,16 @@ def test_networkgraph_six_months_period_recompute_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=180, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -51,7 +50,7 @@ def test_networkgraph_six_months_period_recompute_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=174), count=173 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -61,7 +60,7 @@ def test_networkgraph_six_months_period_recompute_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=180), count=179 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -70,32 +69,56 @@ def test_networkgraph_six_months_period_recompute_available_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + + analyzer.recompute() + + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -114,13 +137,15 @@ def test_networkgraph_six_months_period_recompute_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # for now we've dropped the support for community node creation + # was not required + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{id: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_generated_graph_period_6_months_run_once.py b/tests/integration/test_generated_graph_period_6_months_run_once.py index 4865eeb..4531220 100644 --- a/tests/integration/test_generated_graph_period_6_months_run_once.py +++ b/tests/integration/test_generated_graph_period_6_months_run_once.py @@ -3,10 +3,10 @@ import numpy as np from tc_neo4j_lib.neo4j_ops import Neo4jOps -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_networkgraph_six_months_period_run_once_available_analytics(): @@ -34,17 +34,16 @@ def test_networkgraph_six_months_period_run_once_available_analytics(): "973993299281076286", ] - setup_db_guild( + analyzer = setup_platform( db_access, platform_id, - guildId, discordId_list=acc_id, days_ago_period=90, community_id=community_id, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data # filling heatmaps with some data @@ -52,7 +51,7 @@ def test_networkgraph_six_months_period_run_once_available_analytics(): memberactivity_data = create_empty_memberactivities_data( datetime.now() - timedelta(days=174), count=170 ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) @@ -62,7 +61,7 @@ def test_networkgraph_six_months_period_run_once_available_analytics(): heatmaps_data = create_empty_heatmaps_data( datetime.now() - timedelta(days=180), count=176 ) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) # generating rawinfo samples rawinfo_samples = [] @@ -71,32 +70,56 @@ def test_networkgraph_six_months_period_run_once_available_analytics(): # 24 hours # 180 days for i in range(24 * 180): - sample = { - "type": 19, - "author": np.random.choice(acc_id), - "content": f"test{i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": np.random.choice(acc_id), - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) - - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) - - analyzer = setup_analyzer(guildId) + author = np.random.choice(acc_id) + replied_user = np.random.choice(acc_id) + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + {"name": "reply", "type": "receiver", "users_engaged_id": [author]} + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) + + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) + analyzer.run_once() + graph_schema = analyzer.graph_schema + platform_label = graph_schema.platform_label + results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]-> (g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]-> (g) RETURN DISTINCT r.date as dates ORDER BY dates DESC """ @@ -117,13 +140,14 @@ def test_networkgraph_six_months_period_run_once_available_analytics(): assert dates[-1] == start_analytics_date.timestamp() * 1000 assert dates[0] == end_analytics_date.timestamp() * 1000 - results = neo4j_ops.gds.run_cypher( - f""" - MATCH - (g:Guild {{guildId: '{guildId}'}}) - -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) - RETURN c.id as cid - """ - ) - assert len(results.values) == 1 - assert results["cid"].values == [community_id] + # connection to community is deleted for now + # results = neo4j_ops.gds.run_cypher( + # f""" + # MATCH + # (g:{platform_label} {{id: '{platform_id}'}}) + # -[r:IS_WITHIN]-> (c:Community {{id: '{community_id}'}}) + # RETURN c.id as cid + # """ + # ) + # assert len(results.values) == 1 + # assert results["cid"].values == [community_id] diff --git a/tests/integration/test_get_guild_community_ids.py b/tests/integration/test_get_guild_community_ids.py index c85168e..b0c0b1e 100644 --- a/tests/integration/test_get_guild_community_ids.py +++ b/tests/integration/test_get_guild_community_ids.py @@ -2,8 +2,8 @@ from unittest import TestCase from bson.objectid import ObjectId -from utils.get_guild_utils import get_guild_community_ids -from utils.mongo import MongoSingleton +from tc_analyzer_lib.utils.get_guild_utils import get_platform_guild_id +from tc_analyzer_lib.utils.mongo import MongoSingleton class TestGetGuildId(TestCase): @@ -20,7 +20,7 @@ def test_get_avalable_guild(self): "id": "999888877766655", "icon": "111111111111111111111111", "name": "A guild", - "selectedChannels": [ + "resources": [ "11111111", "22222222", "33333333", @@ -41,7 +41,7 @@ def test_get_avalable_guild(self): } ) - guild_id = get_guild_community_ids(str(platform_id)) + guild_id = get_platform_guild_id(str(platform_id)) self.assertEqual(guild_id, "999888877766655") def test_no_document_raise_error(self): @@ -51,4 +51,4 @@ def test_no_document_raise_error(self): client.drop_database("Core") with self.assertRaises(AttributeError): - get_guild_community_ids(str(platform_id)) + get_platform_guild_id(str(platform_id)) diff --git a/tests/integration/test_get_guild_owner.py b/tests/integration/test_get_guild_owner.py new file mode 100644 index 0000000..844beab --- /dev/null +++ b/tests/integration/test_get_guild_owner.py @@ -0,0 +1,215 @@ +from datetime import datetime, timedelta +from unittest import TestCase + +from bson import ObjectId +from tc_analyzer_lib.utils.get_guild_utils import get_platform_community_owner +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestGetGuildOwner(TestCase): + def setUp(self) -> None: + self.client = MongoSingleton.get_instance().get_client() + self.client["Core"].drop_collection("platforms") + self.client["Core"].drop_collection("users") + self.platform_id = "515151515151515151515151" + self.community_id = "aabbccddeeff001122334455" + self.guild_id = "1234" + self.client.drop_database(self.guild_id) + self.client.drop_database(self.platform_id) + + def tearDown(self) -> None: + self.client["Core"].drop_collection("platforms") + self.client["Core"].drop_collection("users") + self.client.drop_database(self.guild_id) + self.client.drop_database(self.platform_id) + + def test_no_platform_available(self): + with self.assertRaises(AttributeError): + _ = get_platform_community_owner(self.platform_id) + + def test_no_community_available(self): + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(self.platform_id), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId(self.community_id), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + } + ) + with self.assertRaises(AttributeError): + _ = get_platform_community_owner(self.platform_id) + + def test_single_platform(self): + expected_owner_discord_id = "1234567890" + + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(self.platform_id), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId(self.community_id), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + } + ) + self.client["Core"]["users"].insert_one( + { + "_id": ObjectId(self.platform_id), + "discordId": expected_owner_discord_id, + "communities": [ObjectId(self.community_id)], + "createdAt": datetime(2023, 12, 1), + "updatedAt": datetime(2023, 12, 1), + "tcaAt": datetime(2023, 12, 2), + } + ) + + owner = get_platform_community_owner(platform_id=self.platform_id) + + self.assertEqual( + owner, + expected_owner_discord_id, + ) + + def test_multiple_platforms(self): + expected_owner_discord_id = "1234567890" + + self.client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(self.platform_id), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId(self.community_id), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + } + ) + self.client["Core"]["users"].insert_one( + { + "_id": ObjectId(self.platform_id), + "discordId": expected_owner_discord_id, + "communities": [ObjectId(self.community_id)], + "createdAt": datetime(2023, 12, 1), + "updatedAt": datetime(2023, 12, 1), + "tcaAt": datetime(2023, 12, 2), + } + ) + + owner = get_platform_community_owner(platform_id=self.platform_id) + + self.assertEqual(owner, expected_owner_discord_id) + + def test_multiple_platforms_available(self): + expected_owner_discord_id = "1234567891" + platform_id2 = "515151515151515151515152" + platform_id3 = "515151515151515151515153" + + self.client["Core"]["platforms"].insert_many( + [ + { + "_id": ObjectId(self.platform_id), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId(self.community_id), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + { + "_id": ObjectId(platform_id2), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334456"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + { + "_id": ObjectId(platform_id3), + "name": "discord", + "metadata": { + "id": self.guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["1020707129214111827"], + "window": {"period_size": 7, "step_size": 1}, + "action": {"some_Values": 1}, + "period": datetime.now() - timedelta(days=30), + }, + "community": ObjectId("aabbccddeeff001122334457"), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=40)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + }, + ] + ) + self.client["Core"]["users"].insert_one( + { + "_id": ObjectId(self.platform_id), + "discordId": expected_owner_discord_id, + "communities": [ObjectId(self.community_id)], + "createdAt": datetime(2023, 12, 1), + "updatedAt": datetime(2023, 12, 1), + "tcaAt": datetime(2023, 12, 2), + } + ) + + owner = get_platform_community_owner(platform_id=self.platform_id) + + self.assertEqual(owner, expected_owner_discord_id) diff --git a/tests/integration/test_get_guild_platform_id.py b/tests/integration/test_get_guild_platform_id.py index 5542639..bb125b7 100644 --- a/tests/integration/test_get_guild_platform_id.py +++ b/tests/integration/test_get_guild_platform_id.py @@ -2,8 +2,8 @@ from unittest import TestCase from bson.objectid import ObjectId -from utils.get_guild_utils import get_guild_platform_id -from utils.mongo import MongoSingleton +from tc_analyzer_lib.utils.get_guild_utils import get_guild_platform_id +from tc_analyzer_lib.utils.mongo import MongoSingleton class TestGetGuildId(TestCase): @@ -21,7 +21,7 @@ def test_get_avalable_guild(self): "id": guild_id, "icon": "111111111111111111111111", "name": "A guild", - "selectedChannels": [ + "resources": [ "11111111", "22222222", "33333333", diff --git a/tests/integration/test_get_past_7_days_heatmaps.py b/tests/integration/test_get_past_7_days_heatmaps.py index 2e35b46..a1c4686 100644 --- a/tests/integration/test_get_past_7_days_heatmaps.py +++ b/tests/integration/test_get_past_7_days_heatmaps.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta import numpy as np -from discord_analyzer.analysis.utils.member_activity_utils import get_users_past_window +from tc_analyzer_lib.algorithms.utils.member_activity_utils import get_users_past_window from .utils.analyzer_setup import launch_db_access @@ -11,23 +11,23 @@ def test_get_past_7_days_heatmap_users_available_users(): test if we're getting the right heatmap users """ # first create the collections - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) start_date = datetime(2023, 1, 1) - db_access.db_mongo_client[guildId].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[platform_id].create_collection("heatmaps") heatmaps_data = [] acc_names = [] for i in range(250): date = start_date + timedelta(days=i) - account = f"9739932992810762{i}" + account = f"user{i}" document = { - "date": date.strftime("%Y-%m-%d"), - "channelId": "1020707129214111827", + "date": date, + "channel_id": "1020707129214111827", "thr_messages": list(np.zeros(24)), "lone_messages": list(np.zeros(24)), "replier": list(np.zeros(24)), @@ -39,20 +39,20 @@ def test_get_past_7_days_heatmap_users_available_users(): "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], - "account_name": account, + "user": account, } heatmaps_data.append(document) acc_names.append(account) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) start_date = datetime(2023, 1, 1) + timedelta(days=243) user_names = get_users_past_window( - start_date.strftime("%Y-%m-%d"), - (start_date + timedelta(days=250)).strftime("%Y-%m-%d"), - db_access.db_mongo_client[guildId]["heatmaps"], + start_date, + start_date + timedelta(days=250), + db_access.db_mongo_client[platform_id]["heatmaps"], ) assert set(user_names) == set(acc_names[-7:]) @@ -63,23 +63,23 @@ def test_get_all_days_heatmap_users_available_users(): test if we're getting the right heatmap users """ # first create the collections - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) start_date = datetime(2023, 1, 1) - db_access.db_mongo_client[guildId].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[platform_id].create_collection("heatmaps") heatmaps_data = [] acc_names = [] for i in range(250): date = start_date + timedelta(days=i) - account = f"9739932992810762{i}" + account = f"user{i}" document = { - "date": date.strftime("%Y-%m-%d"), - "channelId": "1020707129214111827", + "date": date, + "channel_id": "1020707129214111827", "thr_messages": list(np.zeros(24)), "lone_messages": list(np.zeros(24)), "replier": list(np.zeros(24)), @@ -91,18 +91,18 @@ def test_get_all_days_heatmap_users_available_users(): "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], - "account_name": account, + "user": account, } heatmaps_data.append(document) acc_names.append(account) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) user_names = get_users_past_window( - window_start_date=datetime(2023, 1, 1).strftime("%Y-%m-%d"), - window_end_date=(start_date + timedelta(days=250)).strftime("%Y-%m-%d"), - collection=db_access.db_mongo_client[guildId]["heatmaps"], + window_start_date=datetime(2023, 1, 1), + window_end_date=(start_date + timedelta(days=250)), + collection=db_access.db_mongo_client[platform_id]["heatmaps"], ) assert set(user_names) == set(acc_names) @@ -113,23 +113,21 @@ def test_get_just_7_days_heatmap_users_available_users(): test if we're getting the right heatmap users """ # first create the collections - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) start_date = datetime(2023, 1, 1) - db_access.db_mongo_client[guildId].drop_collection("heatmaps") - - db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") heatmaps_data = [] acc_names = [] for i in range(250): date = start_date + timedelta(days=i) - account = f"9739932992810762{i}" + account = f"user{i}" document = { - "date": date.strftime("%Y-%m-%d"), - "channelId": "1020707129214111827", + "date": date, + "channel_id": "1020707129214111827", "thr_messages": list(np.zeros(24)), "lone_messages": list(np.zeros(24)), "replier": list(np.zeros(24)), @@ -141,33 +139,33 @@ def test_get_just_7_days_heatmap_users_available_users(): "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], - "account_name": account, + "user": account, } heatmaps_data.append(document) acc_names.append(account) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) start_date = datetime(2023, 1, 1) end_date = start_date + timedelta(days=7) user_names = get_users_past_window( - start_date.strftime("%Y-%m-%d"), - end_date.strftime("%Y-%m-%d"), - db_access.db_mongo_client[guildId]["heatmaps"], + start_date, + end_date, + db_access.db_mongo_client[platform_id]["heatmaps"], ) + print("user_names", user_names) assert set(user_names) == set( [ - "97399329928107620", - "97399329928107621", - "97399329928107622", - "97399329928107623", - "97399329928107624", - "97399329928107625", - "97399329928107626", - "97399329928107627", + "user0", + "user1", + "user2", + "user3", + "user4", + "user5", + "user6", ] ) @@ -177,22 +175,22 @@ def test_get_past_7_days_heatmap_users_no_users(): test if we're getting the right heatmap users """ # first create the collections - guildId = "1234" - db_access = launch_db_access(guildId) + platform_id = "515151515151515151515151" + db_access = launch_db_access(platform_id) start_date = datetime(2023, 1, 1) - db_access.db_mongo_client[guildId].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("heatmaps") + db_access.db_mongo_client[platform_id].create_collection("heatmaps") start_date = datetime(2023, 1, 1) end_date = start_date + timedelta(days=243) user_names = get_users_past_window( - window_start_date=start_date.strftime("%Y-%m-%d"), - window_end_date=end_date.strftime("%Y-%m-%d"), - collection=db_access.db_mongo_client[guildId]["heatmaps"], + window_start_date=start_date, + window_end_date=end_date, + collection=db_access.db_mongo_client[platform_id]["heatmaps"], ) assert user_names == [] diff --git a/tests/integration/test_heatmaps_analytics.py b/tests/integration/test_heatmaps_analytics.py new file mode 100644 index 0000000..e7a434b --- /dev/null +++ b/tests/integration/test_heatmaps_analytics.py @@ -0,0 +1,197 @@ +from datetime import datetime, timedelta +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsAnalytics(TestCase): + def setUp(self) -> None: + platform_id = "1234567890" + period = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + resources = ["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[platform_id].drop_collection("rawmemberactivities") + self.mongo_client[platform_id].drop_collection("rawmembers") + + def tearDown(self) -> None: + self.mongo_client[self.heatmaps.platform_id].drop_collection( + "rawmemberactivities" + ) + self.mongo_client[self.heatmaps.platform_id].drop_collection("rawmembers") + + def test_heatmaps_single_day_from_start(self): + platform_id = self.heatmaps.platform_id + day = (datetime.now() - timedelta(days=1)).replace(hour=0, minute=0, second=0) + + self.mongo_client[platform_id]["rawmembers"].insert_one( + { + "id": 9001, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 1, 1), + "options": {}, + }, + ) + + sample_raw_data = [ + { + "author_id": 9001, + "date": day + timedelta(hours=1), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + }, + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=1), + "source_id": "10005", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [ + 9009, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=2), + "source_id": "10003", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=4), + "source_id": "10004", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + }, + { + "name": "mention", + "users_engaged_id": [9008, 9007], + "type": "emitter", + }, + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics = self.heatmaps.start(from_start=True) + + self.assertIsInstance(analytics, list) + + # 3 iteration for heatmaps analytics + # (3 resources) * (1 rawmember) * (1 day) + self.assertEqual(len(analytics), 3) + + for i in range(3): + # the second resource "124" + if i == 1: + self.assertEqual(sum(analytics[i]["thr_messages"]), 3) + self.assertEqual(sum(analytics[i]["lone_messages"]), 1) + self.assertEqual(sum(analytics[i]["replier"]), 1) + self.assertEqual(sum(analytics[i]["replied"]), 1) + # 4 people + self.assertEqual(sum(analytics[i]["mentioner"]), 4) + self.assertEqual(sum(analytics[i]["mentioned"]), 2) + self.assertEqual(sum(analytics[i]["reacter"]), 0) + self.assertEqual(sum(analytics[i]["reacted"]), 1) + + self.assertEqual( + analytics[i]["replied_per_acc"], + [ + { + "account": 9003, + "count": 1, + } + ], + ) + self.assertIn( + {"account": 9003, "count": 1}, + analytics[i]["mentioner_per_acc"], + ) + self.assertIn( + {"account": 9002, "count": 1}, + analytics[i]["mentioner_per_acc"], + ) + + self.assertEqual( + analytics[i]["reacted_per_acc"], + [ + { + "account": 9009, + "count": 1, + } + ], + ) + else: + self.assertEqual(sum(analytics[i]["thr_messages"]), 0) + self.assertEqual(sum(analytics[i]["lone_messages"]), 0) + self.assertEqual(sum(analytics[i]["replier"]), 0) + self.assertEqual(sum(analytics[i]["replied"]), 0) + self.assertEqual(sum(analytics[i]["mentioner"]), 0) + self.assertEqual(sum(analytics[i]["mentioned"]), 0) + self.assertEqual(sum(analytics[i]["reacter"]), 0) + self.assertEqual(sum(analytics[i]["reacted"]), 0) + + self.assertEqual(analytics[i]["mentioner_per_acc"], []) + self.assertEqual(analytics[i]["mentioner_per_acc"], []) + self.assertEqual(analytics[i]["reacted_per_acc"], []) diff --git a/tests/integration/test_heatmaps_analytics_base.py b/tests/integration/test_heatmaps_analytics_base.py new file mode 100644 index 0000000..ed47f74 --- /dev/null +++ b/tests/integration/test_heatmaps_analytics_base.py @@ -0,0 +1,134 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_hourly import AnalyticsHourly +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsAnalyticsBaseWithFilter(TestCase): + def setUp(self) -> None: + self.platform_id = "3456789" + self.raw_data_model = AnalyticsHourly(self.platform_id) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[self.platform_id].drop_collection("rawmemberactivities") + + def tearDown(self) -> None: + # cleanup + self.mongo_client.drop_database(self.platform_id) + + def test_get_hourly_analytics_single_date(self): + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"threadId": 7000, "channelId": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + } + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 1).date(), + activity="interactions", + filters={"interactions.name": "mention"}, + author_id=9000, + ) + + expected_analytics = [ + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_get_hourly_analytics_single_date_irrelevant_filter(self): + """ + test the hourly analytics with a filter that all data will be skipped + """ + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"threadId": 7000, "channelId": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + } + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 1).date(), + activity="interactions", + filters={"interactions.name": "reply"}, + author_id=9000, + ) + + expected_analytics = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) diff --git a/tests/integration/test_heatmaps_analytics_different_source.py b/tests/integration/test_heatmaps_analytics_different_source.py new file mode 100644 index 0000000..3d66b9c --- /dev/null +++ b/tests/integration/test_heatmaps_analytics_different_source.py @@ -0,0 +1,179 @@ +from datetime import datetime, timedelta +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsAnalyticsSingleDay(TestCase): + def setUp(self) -> None: + platform_id = "1234567890" + period = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + resources = ["111", "222", "333"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[platform_id].drop_collection("rawmemberactivities") + self.mongo_client[platform_id].drop_collection("rawmembers") + + def test_heatmaps_single_day_from_start(self): + platform_id = self.heatmaps.platform_id + day = (datetime.now() - timedelta(days=1)).replace(hour=0, minute=0, second=0) + + self.mongo_client[platform_id]["rawmembers"].insert_one( + { + "id": 9001, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 1, 1), + "options": {}, + }, + ) + + sample_raw_data = [ + { + "author_id": 9001, + "date": day + timedelta(hours=1), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + }, + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=1), + "source_id": "10005", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [ + 9009, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=2), + "source_id": "10003", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": day + timedelta(hours=4), + "source_id": "10004", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + }, + { + "name": "mention", + "users_engaged_id": [9008, 9007], + "type": "emitter", + }, + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics = self.heatmaps.start(from_start=True) + + self.assertIsInstance(analytics, list) + + # 3 iteration for heatmaps analytics + # (3 resources) * (1 rawmember) * (1 day) + self.assertEqual(len(analytics), 3) + + for i in range(3): + self.assertEqual(sum(analytics[i]["thr_messages"]), 0) + self.assertEqual(sum(analytics[i]["lone_messages"]), 0) + self.assertEqual(sum(analytics[i]["replier"]), 0) + self.assertEqual(sum(analytics[i]["replied"]), 0) + self.assertEqual(sum(analytics[i]["mentioner"]), 0) + self.assertEqual(sum(analytics[i]["mentioned"]), 0) + self.assertEqual(sum(analytics[i]["reacter"]), 0) + self.assertEqual(sum(analytics[i]["reacted"]), 0) + + self.assertEqual(analytics[i]["mentioner_per_acc"], []) + self.assertEqual(analytics[i]["reacted_per_acc"], []) + + def test_heatmaps_analytics_pre_filled(self): + platform_id = self.heatmaps.platform_id + day = (datetime.now() - timedelta(days=1)).replace(hour=0, minute=0, second=0) + + self.mongo_client[platform_id].drop_collection("heatmaps") + + self.mongo_client[platform_id]["heatmaps"].insert_one( + { + "user": 9000, + "channel_id": "124", + "date": day, + "thr_messages": [0] * 24, + "lone_messages": [0] * 24, + "replier": [0] * 24, + "replied": [0] * 24, + "mentioner": [0] * 24, + "mentioned": [0] * 24, + "reacter": [0] * 24, + "reacted": [0] * 24, + "mentioner_per_acc": [], + "reacted_per_acc": [], + } + ) + + analytics = self.heatmaps.start(from_start=False) + # the day was pre-filled before + # and the period was exactly yesterday + self.assertEqual(analytics, []) diff --git a/tests/integration/test_heatmaps_analytics_hourly_no_filter.py b/tests/integration/test_heatmaps_analytics_hourly_no_filter.py new file mode 100644 index 0000000..f2f8bad --- /dev/null +++ b/tests/integration/test_heatmaps_analytics_hourly_no_filter.py @@ -0,0 +1,308 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_hourly import AnalyticsHourly +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsAnalyticsBaseNoFilter(TestCase): + def setUp(self) -> None: + self.platform_id = "3456789" + self.raw_data_model = AnalyticsHourly(self.platform_id) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[self.platform_id].drop_collection("rawmemberactivities") + + def test_get_hourly_analytics_single_date(self): + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + } + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 1).date(), + activity="interactions", + author_id=9000, + ) + + # mentioning 2 people at hour 0 + expected_analytics = [ + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_get_hourly_analytics_multiple_date(self): + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 1).date(), + activity="interactions", + author_id=9000, + ) + + expected_analytics = [ + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_get_hourly_analytics_multiple_date_multiple_authors(self): + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9000, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 1).date(), + activity="interactions", + author_id=9000, + ) + + expected_analytics = [ + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_get_hourly_analytics_multiple_date_multiple_data(self): + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + hourly_analytics = self.raw_data_model.get_hourly_analytics( + day=datetime(2023, 1, 2).date(), + activity="interactions", + author_id=9001, + ) + + expected_analytics = [ + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertIsInstance(hourly_analytics, list) + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) diff --git a/tests/integration/test_heatmaps_analytics_raw.py b/tests/integration/test_heatmaps_analytics_raw.py new file mode 100644 index 0000000..4c73e29 --- /dev/null +++ b/tests/integration/test_heatmaps_analytics_raw.py @@ -0,0 +1,286 @@ +from datetime import datetime, timedelta +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_raw import AnalyticsRaw +from tc_analyzer_lib.schemas import ActivityDirection, RawAnalyticsItem +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsRawAnalytics(TestCase): + def setUp(self) -> None: + self.platform_id = "3456789" + self.analytics_raw = AnalyticsRaw(self.platform_id) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[self.platform_id].drop_collection("rawmemberactivities") + + def test_raw_analytics_single_user(self): + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day, + activity="interactions", + activity_name="reply", + activity_direction=ActivityDirection.RECEIVER.value, + author_id=9000, + ) + + self.assertIsInstance(analytics_result, list) + self.assertEqual(len(analytics_result), 1) + self.assertIsInstance(analytics_result[0], RawAnalyticsItem) + self.assertEqual(analytics_result[0].account, 9003) + self.assertEqual(analytics_result[0].count, 1) + + def test_raw_analytics_wrong_user(self): + """ + asking for another user's analytics + results should be empty + """ + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day, + activity="interactions", + activity_name="reply", + activity_direction=ActivityDirection.RECEIVER.value, + author_id=9003, + ) + + self.assertEqual(analytics_result, []) + + def test_raw_analytics_wrong_activity_direction(self): + """ + asking for another activity direction analytics + results should be empty + """ + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day, + activity="interactions", + activity_name="reply", + activity_direction=ActivityDirection.EMITTER.value, + author_id=9000, + ) + + self.assertEqual(analytics_result, []) + + def test_raw_analytics_wrong_day(self): + """ + asking for another day analytics + results should be empty + """ + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day + timedelta(days=1), + activity="interactions", + activity_name="reply", + activity_direction=ActivityDirection.RECEIVER.value, + author_id=9000, + ) + self.assertEqual(analytics_result, []) + + def test_raw_analytics_wrong_activity(self): + """ + asking for another activity analytics + results should be empty + """ + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day, + activity="interactions", + activity_name="mention", + activity_direction=ActivityDirection.RECEIVER.value, + author_id=9000, + ) + + self.assertEqual(analytics_result, []) + + def test_raw_analytics_multiple_users(self): + """ + asking for another activity analytics + results should be empty + """ + day = datetime(2023, 1, 1).date() + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9005, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 4), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [9006, 9005], + "type": "receiver", + } + ], + }, + { + "author_id": 9000, + "date": datetime(2023, 1, 2, 4), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9001, + ], + "type": "receiver", + } + ], + }, + ] + self.mongo_client[self.platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics_result = self.analytics_raw.analyze( + day=day, + activity="interactions", + activity_name="reply", + activity_direction=ActivityDirection.RECEIVER.value, + author_id=9000, + ) + + self.assertIsInstance(analytics_result, list) + self.assertEqual(len(analytics_result), 2) + + for analytics in analytics_result: + self.assertIsInstance(analytics, RawAnalyticsItem) + if analytics.account == 9006: + self.assertEqual(analytics.count, 1) + elif analytics.account == 9005: + self.assertEqual(analytics.count, 2) + else: + # raising with values for debug purposes + ValueError( + "Never reaches here! " + f"analytics.account: {analytics.account} " + f"| analytics.count: {analytics.count}" + ) diff --git a/tests/integration/test_heatmaps_hourly_analytics_actions_vectors.py b/tests/integration/test_heatmaps_hourly_analytics_actions_vectors.py new file mode 100644 index 0000000..5f5d6a7 --- /dev/null +++ b/tests/integration/test_heatmaps_hourly_analytics_actions_vectors.py @@ -0,0 +1,260 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_hourly import AnalyticsHourly +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsRawAnalyticsVectorsActions(TestCase): + """ + test the 24 hour vector + """ + + def setUp(self) -> None: + client = MongoSingleton.get_instance().get_client() + platform_id = "781298" + database = client[platform_id] + database.drop_collection("rawmemberactivities") + self.database = database + + self.analytics = AnalyticsHourly(platform_id) + + def test_empty_data(self): + day = datetime(2023, 1, 1) + activity_vector = self.analytics.analyze( + day, + activity="actions", + activity_name="reply", + author_id=9000, + activity_direction="emitter", + additional_filters={"metadata.channel_id": 123}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual(sum(activity_vector), 0) + + def test_no_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 6), + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + "actions": [], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + activity="actions", + activity_name="message", + author_id=9002, + activity_direction="emitter", + additional_filters={"metadata.channel_id": 2000}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual(sum(activity_vector), 0) + + def test_single_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + activity="actions", + activity_name="message", + author_id=9000, + activity_direction="emitter", + additional_filters={ + "metadata.channel_id": 2000, + }, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual( + activity_vector, + [ + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + ) + + def test_multiple_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 5), + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + }, + {"name": "reply", "users_engaged_id": [9003], "type": "emitter"}, + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + author_id=9001, + activity="actions", + activity_name="message", + activity_direction="emitter", + additional_filters={"metadata.channel_id": 2000}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual( + activity_vector, + [ + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + ) + + def test_wrong_activity_type(self): + day = datetime(2023, 1, 1) + + with self.assertRaises(AttributeError): + self.analytics.analyze( + activity="interactions", + activity_name="reply", + day=day, + author_id=9000, + activity_direction="wrong_type", + ) + + def test_wrong_activity(self): + day = datetime(2023, 1, 1) + + with self.assertRaises(AttributeError): + self.analytics.analyze( + activity="activity1", + activity_name="reply", + day=day, + author_id=9000, + activity_direction="emitter", + ) diff --git a/tests/integration/test_heatmaps_hourly_analytics_interaction_vectors.py b/tests/integration/test_heatmaps_hourly_analytics_interaction_vectors.py new file mode 100644 index 0000000..2587e16 --- /dev/null +++ b/tests/integration/test_heatmaps_hourly_analytics_interaction_vectors.py @@ -0,0 +1,376 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_hourly import AnalyticsHourly +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsRawAnalyticsVectorsInteractions(TestCase): + """ + test the 24 hour vector + """ + + def setUp(self) -> None: + client = MongoSingleton.get_instance().get_client() + platform_id = "781298" + database = client[platform_id] + database.drop_collection("rawmemberactivities") + self.database = database + + self.analytics = AnalyticsHourly(platform_id) + + def test_empty_data(self): + day = datetime(2023, 1, 1) + activity_vector = self.analytics.analyze( + day, + activity="interactions", + activity_name="reply", + author_id=9000, + activity_direction="emitter", + additional_filters={"metadata.channel_id": 123}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual(sum(activity_vector), 0) + + def test_no_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 6), + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + activity="interactions", + activity_name="reply", + author_id=9000, + activity_direction="emitter", + additional_filters={"metadata.channel_id": 2000}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual(sum(activity_vector), 0) + + def test_single_relevant_data_type_receiver(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + activity="interactions", + activity_name="reply", + author_id=9000, + activity_direction="receiver", + additional_filters={"metadata.channel_id": 2000}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual( + activity_vector, + [ + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + ) + + def test_single_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9000, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + activity="interactions", + activity_name="reply", + author_id=9000, + activity_direction="emitter", + additional_filters={ + "metadata.channel_id": 2000, + }, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual( + activity_vector, + [ + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + ) + + def test_multiple_relevant_data(self): + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 5), + "source_id": "10001", + "metadata": {"thread_id": 7000, "channel_id": 2000}, + "actions": [{"name": "message", "type": "receiver"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + }, + {"name": "reply", "users_engaged_id": [9003], "type": "emitter"}, + ], + }, + ] + self.database["rawmemberactivities"].insert_many(sample_raw_data) + + activity_vector = self.analytics.analyze( + day, + author_id=9001, + activity="interactions", + activity_name="reply", + activity_direction="emitter", + additional_filters={"metadata.channel_id": 2000}, + ) + + self.assertIsInstance(activity_vector, list) + self.assertEqual(len(activity_vector), 24) + self.assertEqual( + activity_vector, + [ + 0, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ], + ) + + def test_replier_wrong_activity_type(self): + day = datetime(2023, 1, 1) + + with self.assertRaises(AttributeError): + self.analytics.analyze( + activity="interactions", + activity_name="reply", + day=day, + author_id=9000, + activity_direction="wrong_type", + ) + + def test_replier_wrong_activity(self): + day = datetime(2023, 1, 1) + + with self.assertRaises(AttributeError): + self.analytics.analyze( + activity="activity1", + activity_name="reply", + day=day, + author_id=9000, + activity_direction="emitter", + ) diff --git a/tests/integration/test_heatmaps_hourly_lone_message.py b/tests/integration/test_heatmaps_hourly_lone_message.py new file mode 100644 index 0000000..2ae5167 --- /dev/null +++ b/tests/integration/test_heatmaps_hourly_lone_message.py @@ -0,0 +1,91 @@ +from datetime import datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def test_lone_messages(): + platform_id = "1122334455" + mongo_client = MongoSingleton.get_instance().get_client() + + database = mongo_client[platform_id] + + database.drop_collection("rawmemberactivities") + database.drop_collection("rawmembers") + + # data preparation + DAY_COUNT = 2 + day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta( + days=DAY_COUNT + ) + # hours to include interactions + hours_to_include = [2, 4, 19] + + acc_names = [] + prepared_rawmembers = [] + for i in range(3): + acc = f"user_{i}" + acc_names.append(acc) + + prepared_member = { + "id": acc, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, i + 1, 1), + "options": {}, + } + prepared_rawmembers.append(prepared_member) + + prepared_rawmemberactivities = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = "channel_0" + prepared_rawdata = { + "author_id": acc, + "date": data_date, + "source_id": f"9999{i}{hour}{acc}", # message id it was + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + } + prepared_rawmemberactivities.append(prepared_rawdata) + channelIds.add(chId) + dates.add(data_date.replace(hour=0, minute=0, second=0, microsecond=0)) + + database["rawmemberactivities"].insert_many(prepared_rawmemberactivities) + database["rawmembers"].insert_many(prepared_rawmembers) + + analyzer_heatmaps = Heatmaps( + platform_id=platform_id, + period=day, + resources=list(channelIds), + analyzer_config=DiscordAnalyzerConfig(), + ) + results = analyzer_heatmaps.start(from_start=True) + + assert len(results) == len(acc_names) * DAY_COUNT * len(channelIds) + for document in results: + assert document["date"] in dates + assert document["user"] in acc_names + assert document["channel_id"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + + # the only document we have + assert sum(document["lone_messages"]) == len(hours_to_include) diff --git a/tests/integration/test_heatmaps_hourly_mentions.py b/tests/integration/test_heatmaps_hourly_mentions.py new file mode 100644 index 0000000..4a56143 --- /dev/null +++ b/tests/integration/test_heatmaps_hourly_mentions.py @@ -0,0 +1,146 @@ +from datetime import datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def test_mentioned_messages(): + platform_id = "1122334455" + mongo_client = MongoSingleton.get_instance().get_client() + database = mongo_client[platform_id] + + database.drop_collection("rawmemberactivities") + database.drop_collection("rawmembers") + # data preparation + DAY_COUNT = 2 + day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta( + days=DAY_COUNT + ) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + channels = ["channel_0", "channel_1"] + + acc_names = [] + prepared_rawmembers = [] + for i in range(3): + acc = f"user_{i}" + acc_names.append(acc) + + prepared_member = { + "id": acc, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, i + 1, 1), + "options": {}, + } + prepared_rawmembers.append(prepared_member) + + prepared_rawmemberactivities = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + for chId in channels: + data_date = (day + timedelta(days=i)).replace(hour=hour) + prepared_rawdata = { + "author_id": acc, + "date": data_date, + "source_id": f"9999{i}{hour}{acc}", # message id it was + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": ["user_0", "user_1"], + "type": "emitter", + } + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + } + prepared_rawmemberactivities.append(prepared_rawdata) + + # user just interacting with themselves + rawdata_self_interaction = { + "author_id": acc, + "date": data_date, + "source_id": f"1111{i}{hour}{acc}", # message id it was + "actions": [], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [acc], + "type": "receiver", + } + ], + "metadata": { + "channel_id": chId, + "thread_id": chId + "AAA", # could be thr_message + }, + } + prepared_rawmemberactivities.append(rawdata_self_interaction) + + channelIds.add(chId) + dates.add( + data_date.replace(hour=0, minute=0, second=0, microsecond=0) + ) + + database["rawmemberactivities"].insert_many(prepared_rawmemberactivities) + database["rawmembers"].insert_many(prepared_rawmembers) + + accs_mentioned = ["user_0", "user_1"] + + analyzer_heatmaps = Heatmaps( + platform_id=platform_id, + period=day, + resources=list(channelIds), + analyzer_config=DiscordAnalyzerConfig(), + ) + results = analyzer_heatmaps.start(from_start=True) + + assert len(results) == len(acc_names) * DAY_COUNT * len(channelIds) + for document in results: + assert document["user"] in acc_names + assert document["date"] in dates + assert document["channel_id"] in channelIds + assert document["reacted_per_acc"] == [] + assert sum(document["thr_messages"]) == 0 + assert sum(document["reacter"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert document["replied_per_acc"] == [] + assert sum(document["lone_messages"]) == len(hours_to_include) + + if document["user"] == "user_0": + assert document["mentioner_per_acc"] == [ + { + "account": "user_1", + "count": (len(acc_names) - 2) * len(hours_to_include), + } + ] + assert sum(document["mentioner"]) == len(hours_to_include) + assert sum(document["mentioned"]) == 0.0 + + elif document["user"] == "user_1": + assert document["mentioner_per_acc"] == [ + { + "account": "user_0", + "count": (len(acc_names) - 2) * len(hours_to_include), + } + ] + assert sum(document["mentioner"]) == len(hours_to_include) + assert sum(document["mentioned"]) == 0.0 + elif document["user"] == "user_2": + assert len(document["mentioner_per_acc"]) == 2 + assert {"account": "user_0", "count": 9} in document["mentioner_per_acc"] + assert {"account": "user_1", "count": 9} in document["mentioner_per_acc"] + assert sum(document["mentioner"]) == len(hours_to_include) * len( + accs_mentioned + ) + assert sum(document["mentioned"]) == 0 + else: + raise ValueError("No more users! should never reach here.") diff --git a/tests/integration/test_heatmaps_process_hourly_single_day.py b/tests/integration/test_heatmaps_process_hourly_single_day.py new file mode 100644 index 0000000..2a47834 --- /dev/null +++ b/tests/integration/test_heatmaps_process_hourly_single_day.py @@ -0,0 +1,341 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsProcessHourlySingleDay(TestCase): + def setUp(self) -> None: + platform_id = "1234567890" + period = datetime(2024, 1, 1) + resources = list["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[platform_id].drop_collection("rawmemberactivities") + self.mongo_client[platform_id].drop_collection("heatmaps") + + def test_process_hourly_check_return_type(self): + day = datetime(2023, 1, 1) + + hourly_analytics = self.heatmaps._process_hourly_analytics( + day, + resource="124", + author_id=9001, + ) + + self.assertIsInstance(hourly_analytics, dict) + # the config was discord analyzer + self.assertIn("replied", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["replied"], list) + self.assertEqual(len(hourly_analytics["replied"]), 24) + + self.assertIn("replier", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["replier"], list) + self.assertEqual(len(hourly_analytics["replier"]), 24) + + self.assertIn("mentioned", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["mentioned"], list) + self.assertEqual(len(hourly_analytics["mentioned"]), 24) + + self.assertIn("mentioner", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["mentioner"], list) + self.assertEqual(len(hourly_analytics["mentioner"]), 24) + + self.assertIn("reacter", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["reacter"], list) + self.assertEqual(len(hourly_analytics["reacter"]), 24) + + self.assertIn("reacted", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["reacted"], list) + self.assertEqual(len(hourly_analytics["reacted"]), 24) + + self.assertIn("thr_messages", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["thr_messages"], list) + self.assertEqual(len(hourly_analytics["thr_messages"]), 24) + + self.assertIn("lone_messages", hourly_analytics.keys()) + self.assertIsInstance(hourly_analytics["lone_messages"], list) + self.assertEqual(len(hourly_analytics["lone_messages"]), 24) + + def test_process_hourly_single_author(self): + platform_id = self.heatmaps.platform_id + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + hourly_analytics = self.heatmaps._process_hourly_analytics( + day, + resource="124", + author_id=9001, + ) + + self.assertEqual(hourly_analytics["mentioner"][0], 2) + self.assertEqual(hourly_analytics["mentioner"][4], 2) + self.assertEqual(sum(hourly_analytics["mentioner"]), 4) + self.assertEqual(sum(hourly_analytics["mentioned"]), 0) + self.assertEqual(sum(hourly_analytics["reacter"]), 0) + self.assertEqual(sum(hourly_analytics["reacted"]), 0) + + self.assertEqual(hourly_analytics["replied"][2], 1) + self.assertEqual(sum(hourly_analytics["replied"]), 1) + + self.assertEqual(hourly_analytics["replier"][2], 1) + self.assertEqual(sum(hourly_analytics["replier"]), 1) + + self.assertEqual(hourly_analytics["thr_messages"][0], 1) + self.assertEqual(hourly_analytics["thr_messages"][2], 2) + self.assertEqual(sum(hourly_analytics["thr_messages"]), 3) + + self.assertEqual(hourly_analytics["lone_messages"][4], 1) + self.assertEqual(sum(hourly_analytics["lone_messages"]), 1) + + def test_process_hourly_wrong_channel(self): + """ + running the process hourly for another channel that no data is available for + """ + platform_id = self.heatmaps.platform_id + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + hourly_analytics = self.heatmaps._process_hourly_analytics( + day, + resource="125", + author_id=9001, + ) + + self.assertEqual(sum(hourly_analytics["mentioned"]), 0) + self.assertEqual(sum(hourly_analytics["mentioner"]), 0) + self.assertEqual(sum(hourly_analytics["reacter"]), 0) + self.assertEqual(sum(hourly_analytics["reacted"]), 0) + self.assertEqual(sum(hourly_analytics["replied"]), 0) + self.assertEqual(sum(hourly_analytics["replier"]), 0) + self.assertEqual(sum(hourly_analytics["thr_messages"]), 0) + self.assertEqual(sum(hourly_analytics["lone_messages"]), 0) + + def test_process_hourly_wrong_author(self): + """ + running the process hourly for another author that no data is available for + """ + platform_id = self.heatmaps.platform_id + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + hourly_analytics = self.heatmaps._process_hourly_analytics( + day, + resource="124", + author_id=9005, + ) + + self.assertEqual(sum(hourly_analytics["mentioned"]), 0) + self.assertEqual(sum(hourly_analytics["mentioner"]), 0) + self.assertEqual(sum(hourly_analytics["reacter"]), 0) + self.assertEqual(sum(hourly_analytics["reacted"]), 0) + self.assertEqual(sum(hourly_analytics["replied"]), 0) + self.assertEqual(sum(hourly_analytics["replier"]), 0) + self.assertEqual(sum(hourly_analytics["thr_messages"]), 0) + self.assertEqual(sum(hourly_analytics["lone_messages"]), 0) diff --git a/tests/integration/test_heatmaps_process_raw_analytics.py b/tests/integration/test_heatmaps_process_raw_analytics.py new file mode 100644 index 0000000..18eeb69 --- /dev/null +++ b/tests/integration/test_heatmaps_process_raw_analytics.py @@ -0,0 +1,243 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsProcessRawAnalyticsSingleDay(TestCase): + def setUp(self) -> None: + self.platform_id = "1234567890" + period = datetime(2024, 1, 1) + resources = list["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=self.platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + self.mongo_client = MongoSingleton.get_instance().get_client() + self.mongo_client[self.platform_id].drop_collection("rawmemberactivities") + + def tearDown(self) -> None: + self.mongo_client.drop_database(self.platform_id) + + def test_empty_data(self): + day = datetime(2023, 1, 1) + + analytics = self.heatmaps._process_raw_analytics( + day=day, + resource="124", + author_id=9000, + ) + self.assertIn("replied_per_acc", analytics.keys()) + self.assertIn("mentioner_per_acc", analytics.keys()) + self.assertIn("reacted_per_acc", analytics.keys()) + + self.assertIsInstance(analytics["replied_per_acc"], list) + self.assertEqual(len(analytics["replied_per_acc"]), 0) + + self.assertIsInstance(analytics["mentioner_per_acc"], list) + self.assertEqual(len(analytics["mentioner_per_acc"]), 0) + + self.assertIsInstance(analytics["reacted_per_acc"], list) + self.assertEqual(len(analytics["reacted_per_acc"]), 0) + + def test_single_author(self): + platform_id = self.heatmaps.platform_id + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "emitter", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9002], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics = self.heatmaps._process_raw_analytics( + day=day, + resource="124", + author_id=9001, + ) + + self.assertIsInstance(analytics["replied_per_acc"], list) + self.assertIsInstance(analytics["mentioner_per_acc"], list), + self.assertIsInstance(analytics["reacted_per_acc"], list), + + self.assertEqual(len(analytics["replied_per_acc"]), 1) + self.assertEqual(analytics["replied_per_acc"][0]["account"], 9003) + self.assertEqual(analytics["replied_per_acc"][0]["count"], 1) + + self.assertEqual(len(analytics["mentioner_per_acc"]), 2) + self.assertIn(analytics["mentioner_per_acc"][0]["account"], [9002, 9003]) + self.assertEqual(analytics["mentioner_per_acc"][0]["count"], 1) + self.assertIn(analytics["mentioner_per_acc"][1]["account"], [9002, 9003]) + self.assertEqual(analytics["mentioner_per_acc"][1]["count"], 1) + + self.assertEqual(analytics["reacted_per_acc"], []) + + def test_multiple_authors(self): + platform_id = self.heatmaps.platform_id + day = datetime(2023, 1, 1) + + sample_raw_data = [ + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [ + 9003, + ], + "type": "receiver", + } + ], + }, + { + "author_id": 9001, + "date": day, + "source_id": "10001", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9005], + "type": "receiver", + }, + { + "name": "reaction", + "users_engaged_id": [9003, 9008], + "type": "emitter", + }, + ], + }, + { + "author_id": 9002, + "date": datetime(2023, 1, 1, 2), + "source_id": "10000", + "metadata": {"thread_id": "7000", "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [9003, 9008], + "type": "emitter", + }, + { + "name": "mention", + "users_engaged_id": [9003, 9005], + "type": "receiver", + }, + ], + }, + { + "author_id": 9001, + "date": datetime(2023, 1, 1, 4), + "source_id": "10001", + "metadata": {"thread_id": None, "channel_id": "124"}, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "mention", + "users_engaged_id": [9003, 9005], + "type": "emitter", + } + ], + }, + ] + self.mongo_client[platform_id]["rawmemberactivities"].insert_many( + sample_raw_data + ) + + analytics = self.heatmaps._process_raw_analytics( + day=day, + resource="124", + author_id=9001, + ) + + self.assertIsInstance(analytics["replied_per_acc"], list) + self.assertIsInstance(analytics["mentioner_per_acc"], list), + self.assertIsInstance(analytics["reacted_per_acc"], list), + + self.assertEqual(analytics["replied_per_acc"], []) + + self.assertEqual(len(analytics["mentioner_per_acc"]), 2) + self.assertIn(analytics["mentioner_per_acc"][0]["account"], [9003, 9005]) + self.assertEqual(analytics["mentioner_per_acc"][0]["count"], 1) + self.assertIn(analytics["mentioner_per_acc"][1]["account"], [9003, 9005]) + self.assertEqual(analytics["mentioner_per_acc"][1]["count"], 1) + + self.assertEqual(len(analytics["reacted_per_acc"]), 2) + self.assertIn(analytics["reacted_per_acc"][0]["account"], [9003, 9008]) + self.assertEqual(analytics["reacted_per_acc"][0]["count"], 1) + self.assertIn(analytics["reacted_per_acc"][1]["account"], [9003, 9008]) + self.assertEqual(analytics["reacted_per_acc"][1]["count"], 1) diff --git a/tests/integration/test_heatmaps_reactions.py b/tests/integration/test_heatmaps_reactions.py new file mode 100644 index 0000000..09cd53b --- /dev/null +++ b/tests/integration/test_heatmaps_reactions.py @@ -0,0 +1,178 @@ +from datetime import datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def test_reacted_messages(): + platform_id = "1122334455" + mongo_client = MongoSingleton.get_instance().get_client() + + database = mongo_client[platform_id] + + database.drop_collection("rawmemberactivities") + database.drop_collection("rawmembers") + + # data preparation + DAY_COUNT = 2 + day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta( + days=DAY_COUNT + ) + + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + + acc_names = [] + prepared_rawmembers = [] + for i in range(5): + acc = f"user_{i}" + acc_names.append(acc) + + prepared_member = { + "id": acc, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, i + 1, 1), + "options": {}, + } + prepared_rawmembers.append(prepared_member) + + prepared_rawmemberactivities = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for author in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = "channel_0" + source_id = f"9999{i}{hour}{author}" # message id it was + + prepared_rawdata = [ + { + "author_id": author, + "date": data_date, + "source_id": source_id, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": ["user_0", "user_1", "user_2"], + "type": "receiver", + } + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + { + "author_id": "user_0", + "date": data_date, + "source_id": source_id, + "actions": [], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [author], + "type": "emitter", + }, + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + { + "author_id": "user_1", + "date": data_date, + "source_id": source_id, + "actions": [], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [author], + "type": "emitter", + }, + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + { + "author_id": "user_2", + "date": data_date, + "source_id": source_id, + "actions": [], + "interactions": [ + { + "name": "reaction", + "users_engaged_id": [author], + "type": "emitter", + }, + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + ] + prepared_rawmemberactivities.extend(prepared_rawdata) + channelIds.add(chId) + dates.add(data_date.replace(hour=0, minute=0, second=0, microsecond=0)) + + database["rawmemberactivities"].insert_many(prepared_rawmemberactivities) + database["rawmembers"].insert_many(prepared_rawmembers) + + reacted_accs = set(["user_0", "user_1", "user_2"]) + + analyzer_heatmaps = Heatmaps( + platform_id=platform_id, + period=day, + resources=list(channelIds), + analyzer_config=DiscordAnalyzerConfig(), + ) + results = analyzer_heatmaps.start(from_start=True) + + assert len(results) == len(acc_names) * DAY_COUNT * len(channelIds) + for document in results: + assert document["user"] in acc_names + assert document["date"] in dates + assert document["user"] in acc_names + assert document["channel_id"] in channelIds + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["lone_messages"]) == len(hours_to_include) + + if document["user"] not in reacted_accs: + assert document["reacted_per_acc"] == [] + + # the only document we have + # 3 is the emoji count + assert sum(document["reacter"]) == len(hours_to_include) * len(reacted_accs) + assert sum(document["reacted"]) == 0 + else: + user = document["user"] + + for acc in set(acc_names) - set([user]): + expected_raw_analytics_item = { + "account": acc, + "count": len(hours_to_include), + } + assert expected_raw_analytics_item in document["reacted_per_acc"] + + # the minus operations on acc_names + # is for ignoring the self interaction + assert sum(document["reacter"]) == len(hours_to_include) * ( + len(acc_names) - 2 - 1 + ) + assert sum(document["reacted"]) == len(hours_to_include) * ( + len(acc_names) - 1 + ) diff --git a/tests/integration/test_heatmaps_replier.py b/tests/integration/test_heatmaps_replier.py new file mode 100644 index 0000000..a716060 --- /dev/null +++ b/tests/integration/test_heatmaps_replier.py @@ -0,0 +1,126 @@ +from datetime import datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def test_reply_messages(): + platform_id = "1122334455" + mongo_client = MongoSingleton.get_instance().get_client() + database = mongo_client[platform_id] + + database.drop_collection("rawmemberactivities") + database.drop_collection("rawmembers") + # data preparation + DAY_COUNT = 3 + day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta( + days=DAY_COUNT + ) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + + acc_names = [] + prepared_rawmembers = [] + for i in range(5): + acc = f"user_{i}" + acc_names.append(acc) + + prepared_member = { + "id": acc, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, i + 1, 1), + "options": {}, + } + prepared_rawmembers.append(prepared_member) + + prepared_rawmemberactivities = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + data_date = (day + timedelta(days=i)).replace(hour=hour) + chId = "channel_0" + source_id = f"9999{i}{hour}{acc}" # message id it was + prepared_rawdata = [ + { + "author_id": acc, + "date": data_date, + "source_id": source_id, + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [ + { + "name": "reply", + "users_engaged_id": ["user_1"], + "type": "emitter", + } + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + { + "author_id": "user_1", + "date": data_date, + "source_id": source_id, + "actions": [], + "interactions": [ + { + "name": "reply", + "users_engaged_id": [acc], + "type": "receiver", + } + ], + "metadata": { + "channel_id": chId, + "thread_id": None, + }, + }, + ] + prepared_rawmemberactivities.extend(prepared_rawdata) + + channelIds.add(chId) + dates.add(data_date.replace(hour=0, minute=0, second=0, microsecond=0)) + + database["rawmemberactivities"].insert_many(prepared_rawmemberactivities) + database["rawmembers"].insert_many(prepared_rawmembers) + + analyzer_heatmaps = Heatmaps( + platform_id=platform_id, + period=day, + resources=list(channelIds), + analyzer_config=DiscordAnalyzerConfig(), + ) + results = analyzer_heatmaps.start(from_start=True) + + assert len(results) == len(acc_names) * DAY_COUNT * len(channelIds) + for document in results: + assert document["user"] in acc_names + assert document["date"] in dates + assert document["user"] in acc_names + assert document["channel_id"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + # the message action + assert sum(document["lone_messages"]) == len(hours_to_include) + assert sum(document["thr_messages"]) == 0 + assert sum(document["mentioner"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + + if document["user"] == "user_1": + assert document["replied_per_acc"] == [] + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == len(hours_to_include) * ( + len(acc_names) - 1 + ) + else: + assert document["replied_per_acc"] == [ + {"account": "user_1", "count": len(hours_to_include)} + ] + assert sum(document["replied"]) == len(hours_to_include) + assert sum(document["replier"]) == 0 diff --git a/tests/integration/test_heatmaps_thread_msg.py b/tests/integration/test_heatmaps_thread_msg.py new file mode 100644 index 0000000..09fa994 --- /dev/null +++ b/tests/integration/test_heatmaps_thread_msg.py @@ -0,0 +1,88 @@ +from datetime import datetime, timedelta + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +def test_thread_messages(): + platform_id = "1122334455" + mongo_client = MongoSingleton.get_instance().get_client() + database = mongo_client[platform_id] + + database.drop_collection("rawmemberactivities") + database.drop_collection("rawmembers") + # data preparation + DAY_COUNT = 2 + day = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) - timedelta( + days=DAY_COUNT + ) + # hours to include interactions + hours_to_include = [2, 4, 5, 13, 16, 18, 19, 20, 21] + + acc_names = [] + prepared_rawmembers = [] + for i in range(3): + acc = f"user_{i}" + acc_names.append(acc) + + prepared_member = { + "id": acc, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, i + 1, 1), + "options": {}, + } + prepared_rawmembers.append(prepared_member) + + prepared_rawmemberactivities = [] + channelIds = set() + dates = set() + + for i in range(DAY_COUNT): + for hour in hours_to_include: + for acc in acc_names: + chId = "channel_0" + data_date = (day + timedelta(days=i)).replace(hour=hour) + prepared_rawdata = { + "author_id": acc, + "date": data_date, + "source_id": f"9999{i}{hour}{acc}", # message id it was + "actions": [{"name": "message", "type": "emitter"}], + "interactions": [], + "metadata": { + "channel_id": chId, + "thread_id": chId + "THREAD", + }, + } + prepared_rawmemberactivities.append(prepared_rawdata) + + channelIds.add(chId) + dates.add(data_date.replace(hour=0, minute=0, second=0, microsecond=0)) + + database["rawmemberactivities"].insert_many(prepared_rawmemberactivities) + database["rawmembers"].insert_many(prepared_rawmembers) + + analyzer_heatmaps = Heatmaps( + platform_id=platform_id, + period=day, + resources=list(channelIds), + analyzer_config=DiscordAnalyzerConfig(), + ) + results = analyzer_heatmaps.start(from_start=True) + + assert len(results) == len(acc_names) * DAY_COUNT + for document in results: + assert document["user"] in acc_names + assert document["date"] in dates + assert document["channel_id"] in channelIds + assert document["reacted_per_acc"] == [] + assert document["mentioner_per_acc"] == [] + assert document["replied_per_acc"] == [] + assert sum(document["thr_messages"]) == len(hours_to_include) + assert sum(document["mentioner"]) == 0 + assert sum(document["replied"]) == 0 + assert sum(document["replier"]) == 0 + assert sum(document["mentioned"]) == 0 + assert sum(document["reacter"]) == 0 + assert sum(document["lone_messages"]) == 0 diff --git a/tests/integration/test_heatmaps_utils.py b/tests/integration/test_heatmaps_utils.py new file mode 100644 index 0000000..a9cb018 --- /dev/null +++ b/tests/integration/test_heatmaps_utils.py @@ -0,0 +1,211 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.heatmaps_utils import HeatmapsUtils +from tc_analyzer_lib.utils.mongo import MongoSingleton + + +class TestHeatmapsUtils(TestCase): + def setUp(self) -> None: + client = MongoSingleton.get_instance().get_client() + self.platform_id = "1234567890" + self.database = client[self.platform_id] + self.database.drop_collection("rawmembers") + + self.utils = HeatmapsUtils(self.platform_id) + + def test_get_users_empty_collection(self): + users = self.utils.get_users() + self.assertEqual(list(users), []) + + def test_get_real_users(self): + sample_users = [ + { + "id": 9000, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 6, 1), + "options": {}, + }, + { + "id": 9001, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 6, 1), + "options": {}, + }, + { + "id": 9002, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2024, 1, 1), + "options": {}, + }, + ] + self.database["rawmembers"].insert_many(sample_users) + + users = self.utils.get_users() + + self.assertEqual(list(users), [{"id": 9000}, {"id": 9001}]) + + def test_get_bots(self): + sample_users = [ + { + "id": 9000, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 6, 2), + "options": {}, + }, + { + "id": 9001, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2023, 6, 1), + "options": {}, + }, + { + "id": 9002, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2024, 1, 1), + "options": {}, + }, + ] + self.database["rawmembers"].insert_many(sample_users) + + users = self.utils.get_users(is_bot=True) + + self.assertEqual(list(users), [{"id": 9001}, {"id": 9002}]) + + def test_get_users_count_empty_data(self): + count = self.utils.get_users_count() + self.assertIsInstance(count, int) + self.assertEqual(count, 0) + + def test_get_users_count_real_users(self): + sample_users = [ + { + "id": 9000, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2023, 6, 2), + "options": {}, + }, + { + "id": 9001, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2023, 6, 1), + "options": {}, + }, + { + "id": 9002, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2024, 1, 1), + "options": {}, + }, + ] + self.database["rawmembers"].insert_many(sample_users) + + count = self.utils.get_users_count() + self.assertIsInstance(count, int) + self.assertEqual(count, 2) + + def test_get_users_count_bots(self): + sample_users = [ + { + "id": 9000, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2023, 6, 2), + "options": {}, + }, + { + "id": 9001, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2023, 6, 1), + "options": {}, + }, + { + "id": 9002, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2024, 1, 1), + "options": {}, + }, + { + "id": 9003, + "is_bot": False, + "left_at": None, + "joined_at": datetime(2024, 2, 1), + "options": {}, + }, + { + "id": 9004, + "is_bot": True, + "left_at": None, + "joined_at": datetime(2024, 2, 3), + "options": {}, + }, + ] + self.database["rawmembers"].insert_many(sample_users) + + count = self.utils.get_users_count(is_bot=True) + self.assertIsInstance(count, int) + self.assertEqual(count, 3) + + def test_get_last_date_no_document(self): + self.database.drop_collection("heatmaps") + + last_date = self.utils.get_last_date() + + self.assertIsNone(last_date) + + def test_get_last_date_single_document(self): + self.database.drop_collection("heatmaps") + + document = { + "user": 9000, + "channel_id": "124", + "date": datetime(2023, 1, 1), + "hourly_analytics": [], + "raw_analytics": [], + } + self.database["heatmaps"].insert_one(document) + + last_date = self.utils.get_last_date() + self.assertEqual(last_date, datetime(2023, 1, 1)) + + def test_get_last_date_multiple_documents(self): + self.database.drop_collection("heatmaps") + + documents = [ + { + "user": 9000, + "channel_id": "124", + "date": datetime(2023, 1, 1), + "hourly_analytics": [], + "raw_analytics": [], + }, + { + "user": 9000, + "channel_id": "124", + "date": datetime(2023, 1, 2), + "hourly_analytics": [], + "raw_analytics": [], + }, + { + "user": 9001, + "channel_id": "126", + "date": datetime(2023, 1, 3), + "hourly_analytics": [], + "raw_analytics": [], + }, + ] + self.database["heatmaps"].insert_many(documents) + + last_date = self.utils.get_last_date() + self.assertEqual(last_date, datetime(2023, 1, 3)) diff --git a/tests/integration/test_interacted_in_deletion.py b/tests/integration/test_interacted_in_deletion.py index 04b7d49..6c3227d 100644 --- a/tests/integration/test_interacted_in_deletion.py +++ b/tests/integration/test_interacted_in_deletion.py @@ -1,4 +1,5 @@ -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -8,66 +9,72 @@ def test_interacted_in_deletion(): """ neo4j_ops = Neo4jOps.get_instance() - neo4j_analytics = Neo4JAnalytics() - neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + neo4j_analytics = Neo4JAnalytics(platform_id, graph_schema) + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - MERGE (a)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (b)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (c)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (d)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) + MERGE (a)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (b)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (c)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (d)-[:{interacted_in} {{date: {yesterday}}}]->(g) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - neo4j_analytics._remove_analytics_interacted_in(guildId) + neo4j_analytics._remove_analytics_interacted_in() results = neo4j_ops.gds.run_cypher( f""" - MATCH (:DiscordAccount) -[r:INTERACTED_IN]->(:Guild {{guildId : '{guildId}'}}) + MATCH (:{user_label}) -[r:{interacted_in}]->(:{platform_label} {{guildId : '{platform_id}'}}) RETURN r """ ) diff --git a/tests/integration/test_lcc_all_connected.py b/tests/integration/test_lcc_all_connected.py index 9c24599..1b2f172 100644 --- a/tests/integration/test_lcc_all_connected.py +++ b/tests/integration/test_lcc_all_connected.py @@ -1,7 +1,8 @@ # test out local clustering coefficient with all nodes connected -from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( +from tc_analyzer_lib.algorithms.neo4j_analysis.local_clustering_coefficient import ( LocalClusteringCoeff, ) +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -22,36 +23,42 @@ def test_all_connected_coeffs(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) - MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + MERGE (a) -[r:{interacted_with} {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:{interacted_with} {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:{interacted_with} {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:{interacted_with} {{weight: 2, date: {yesterday}}}]->(c) + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' """ ) - lcc = LocalClusteringCoeff() - lcc.compute(guildId=guildId, from_start=True) + lcc = LocalClusteringCoeff(platform_id, graph_schema) + lcc.compute(from_start=True) # getting the results results = neo4j_ops.gds.run_cypher( f""" - MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (:Guild {{guildId: '{guildId}'}}) + MATCH (a:{user_label}) -[r:{interacted_in}]-> (:{platform_label} {{id: '{platform_id}'}}) RETURN - a.userId as userId, + a.id as userId, r.date as date, r.localClusteringCoefficient as lcc """ diff --git a/tests/integration/test_lcc_partially_connected.py b/tests/integration/test_lcc_partially_connected.py index 73e6b5b..a00ea58 100644 --- a/tests/integration/test_lcc_partially_connected.py +++ b/tests/integration/test_lcc_partially_connected.py @@ -1,7 +1,8 @@ # the nodes of the graph are partially connected -from discord_analyzer.analysis.neo4j_analysis.local_clustering_coefficient import ( +from tc_analyzer_lib.algorithms.neo4j_analysis.local_clustering_coefficient import ( LocalClusteringCoeff, ) +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -20,57 +21,64 @@ def test_partially_connected_coeffs(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - lcc = LocalClusteringCoeff() - lcc.compute(guildId=guildId) + lcc = LocalClusteringCoeff(platform_id, graph_schema) + lcc.compute() # getting the results results = neo4j_ops.gds.run_cypher( f""" - MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (:Guild {{guildId: '{guildId}'}}) + MATCH (a:{user_label}) -[r:{interacted_in}]-> (:{platform_label} {{id: '{platform_id}'}}) RETURN - a.userId as userId, + a.id as userId, r.date as date, r.localClusteringCoefficient as lcc """ diff --git a/tests/integration/test_louvain_algorithm_computation.py b/tests/integration/test_louvain_algorithm_computation.py index 2f6bef0..7a336b3 100644 --- a/tests/integration/test_louvain_algorithm_computation.py +++ b/tests/integration/test_louvain_algorithm_computation.py @@ -1,4 +1,5 @@ -from discord_analyzer.analysis.neo4j_analysis.louvain import Louvain +from tc_analyzer_lib.algorithms.neo4j_analysis.louvain import Louvain +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -13,34 +14,40 @@ def test_louvain_algorithm_available_data(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guild_id = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guild_id}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) - MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) - SET r.guildId = '{guild_id}' - SET r2.guildId = '{guild_id}' - SET r3.guildId = '{guild_id}' - SET r4.guildId = '{guild_id}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + MERGE (a) -[r:{interacted_with} {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:{interacted_with} {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:{interacted_with} {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:{interacted_with} {{weight: 2, date: {yesterday}}}]->(c) + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' """ ) - louvain = Louvain() - - louvain.compute(guild_id=guild_id, from_start=False) + louvain = Louvain(platform_id, graph_schema) + louvain.compute(from_start=False) results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guild_id}'}})-[r:HAVE_METRICS]->(g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]->(g) RETURN r.date as date, r.louvainModularityScore as modularityScore """ ) @@ -61,55 +68,62 @@ def test_louvain_algorithm_more_available_data(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guild_id = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guild_id}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guild_id}' - SET r2.guildId = '{guild_id}' - SET r3.guildId = '{guild_id}' - SET r4.guildId = '{guild_id}' - SET r5.guildId = '{guild_id}' - SET r6.guildId = '{guild_id}' - SET r7.guildId = '{guild_id}' - SET r8.guildId = '{guild_id}' - SET r9.guildId = '{guild_id}' - SET r10.guildId = '{guild_id}' - SET r11.guildId = '{guild_id}' - SET r12.guildId = '{guild_id}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - louvain = Louvain() + louvain = Louvain(platform_id, graph_schema) - louvain.compute(guild_id=guild_id, from_start=False) + louvain.compute(from_start=False) results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guild_id}'}})-[r:HAVE_METRICS]->(g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]->(g) RETURN r.date as date, r.louvainModularityScore as modularityScore """ ) diff --git a/tests/integration/test_louvain_algorithm_get_computed_dates.py b/tests/integration/test_louvain_algorithm_get_computed_dates.py index 01114c2..e76fb6b 100644 --- a/tests/integration/test_louvain_algorithm_get_computed_dates.py +++ b/tests/integration/test_louvain_algorithm_get_computed_dates.py @@ -1,5 +1,5 @@ -from discord_analyzer.analysis.neo4j_analysis.louvain import Louvain -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.louvain import Louvain +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -14,34 +14,42 @@ def test_louvain_get_computed_dates_empty_data(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guild_id = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guild_id}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) - MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) - SET r.guildId = '{guild_id}' - SET r2.guildId = '{guild_id}' - SET r3.guildId = '{guild_id}' - SET r4.guildId = '{guild_id}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + MERGE (a) -[r:{interacted_with} {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:{interacted_with} {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:{interacted_with} {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:{interacted_with} {{weight: 2, date: {yesterday}}}]->(c) + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' """ ) - louvain = Louvain() - projection_utils = ProjectionUtils(guildId=guild_id) + louvain = Louvain(platform_id, graph_schema) - computed_dates = louvain.get_computed_dates(projection_utils, guildId=guild_id) + computed_dates = louvain.get_computed_dates() assert computed_dates == set() + # clean-up + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + def test_louvain_get_computed_dates_empty_data_with_have_metrics_relation(): """ @@ -54,34 +62,40 @@ def test_louvain_get_computed_dates_empty_data_with_have_metrics_relation(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guild_id = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guild_id}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) - MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + MERGE (a) -[r:{interacted_with} {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:{interacted_with} {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:{interacted_with} {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:{interacted_with} {{weight: 2, date: {yesterday}}}]->(c) MERGE (g)-[:HAVE_METRICS {{date: {yesterday}}}]->(g) - SET r.guildId = '{guild_id}' - SET r2.guildId = '{guild_id}' - SET r3.guildId = '{guild_id}' - SET r4.guildId = '{guild_id}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' """ ) - louvain = Louvain() - projection_utils = ProjectionUtils(guildId=guild_id) - - computed_dates = louvain.get_computed_dates(projection_utils, guildId=guild_id) + louvain = Louvain(platform_id, graph_schema) + computed_dates = louvain.get_computed_dates() assert computed_dates == set() + # clean-up + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") def test_louvain_get_computed_dates_one_data(): @@ -95,31 +109,37 @@ def test_louvain_get_computed_dates_one_data(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guild_id = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guild_id}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - MERGE (a) -[r:INTERACTED_WITH {{weight: 1, date: {yesterday}}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{weight: 2, date: {today}}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{weight: 3, date: {yesterday}}}]->(c) - MERGE (b) -[r4:INTERACTED_WITH {{weight: 2, date: {yesterday}}}]->(c) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + MERGE (a) -[r:{interacted_with} {{weight: 1, date: {yesterday}}}]->(b) + MERGE (a) -[r2:{interacted_with} {{weight: 2, date: {today}}}]->(b) + MERGE (a) -[r3:{interacted_with} {{weight: 3, date: {yesterday}}}]->(c) + MERGE (b) -[r4:{interacted_with} {{weight: 2, date: {yesterday}}}]->(c) MERGE (g)-[:HAVE_METRICS {{date: {yesterday}, louvainModularityScore: 0.0}}]->(g) - SET r.guildId = '{guild_id}' - SET r2.guildId = '{guild_id}' - SET r3.guildId = '{guild_id}' - SET r4.guildId = '{guild_id}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' """ ) - louvain = Louvain() - projection_utils = ProjectionUtils(guildId=guild_id) - - computed_dates = louvain.get_computed_dates(projection_utils, guildId=guild_id) + louvain = Louvain(platform_id, graph_schema) + computed_dates = louvain.get_computed_dates() assert computed_dates == {yesterday} + # clean-up + neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") diff --git a/tests/integration/test_member_activities_action_all_active.py b/tests/integration/test_member_activities_action_all_active.py index 0a791d7..ec87057 100644 --- a/tests/integration/test_member_activities_action_all_active.py +++ b/tests/integration/test_member_activities_action_all_active.py @@ -1,56 +1,50 @@ from datetime import datetime, timedelta from unittest import TestCase -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform class TestMemberActivitiesActionsAllActive(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + self.platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.db_access = launch_db_access(self.platform_id) def test_single_user_action(self): - platform_id = "515151515151515151515151" + self.db_access.db_mongo_client[self.platform_id].drop_collection("heatmaps") users_id_list = ["user1"] - setup_db_guild( + analyzer = analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") rawinfo_samples = [] for i in range(35 * 24): + author = "user1" sample = { - "type": 0, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( - {}, {"_id": 0, "all_active": 1} - ) + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find({}, {"_id": 0, "all_active": 1}) # memberactivities computed_analytics = list(cursor) @@ -60,47 +54,41 @@ def test_single_user_action(self): def test_lone_msg_action(self): users_id_list = ["user1", "user2", "user3"] - platform_id = "515151515151515151515151" - setup_db_guild( + analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.platform_id].drop_collection("heatmaps") rawinfo_samples = [] active_users = ["user1", "user2"] for i in range(35 * 24): + author = active_users[i % len(active_users)] sample = { - "type": 0, - "author": active_users[i % len(active_users)], - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( - {}, {"_id": 0, "all_active": 1} - ) + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find({}, {"_id": 0, "all_active": 1}) # memberactivities computed_analytics = list(cursor) @@ -109,47 +97,41 @@ def test_lone_msg_action(self): self.assertEqual(set(document["all_active"]), set(["user1", "user2"])) def test_thr_message_action(self): - platform_id = "515151515151515151515151" users_id_list = ["user1", "user2", "user3", "user4"] - setup_db_guild( + analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.platform_id].drop_collection("heatmaps") rawinfo_samples = [] active_users = ["user1", "user2"] for i in range(35 * 24): + author = active_users[i % len(active_users)] sample = { - "type": 0, - "author": active_users[i % len(active_users)], - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": f"19191{i % 5}", - "threadName": f"Thread_test_{i % 5}", - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": f"19191{i % 5}", + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( - {}, {"_id": 0, "all_active": 1, "date": 1} - ) + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find({}, {"_id": 0, "all_active": 1, "date": 1}) # memberactivities computed_analytics = list(cursor) diff --git a/tests/integration/test_member_activities_action_all_activities.py b/tests/integration/test_member_activities_action_all_activities.py index 51b6dec..08bc936 100644 --- a/tests/integration/test_member_activities_action_all_activities.py +++ b/tests/integration/test_member_activities_action_all_activities.py @@ -1,14 +1,14 @@ from datetime import datetime, timedelta from unittest import TestCase -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform class TestMemberActivitiesActionsAllActivities(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + self.platformId = "1234" + self.db_access = launch_db_access(self.platformId) def test_single_user_action(self): """ @@ -32,43 +32,39 @@ def test_single_user_action(self): } platform_id = "515151515151515151515151" - setup_db_guild( + analyzer = setup_platform( self.db_access, platform_id, - self.guildId, discordId_list=users_id_list, days_ago_period=35, action=action, + resources=["123"], ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[platform_id].drop_collection("heatmaps") rawinfo_samples = [] for i in range(35 * 24): + author = "user1" sample = { - "type": 0, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "123", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( + self.db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( rawinfo_samples ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( + + analyzer.recompute() + cursor = self.db_access.db_mongo_client[platform_id]["memberactivities"].find( {}, { "_id": 0, diff --git a/tests/integration/test_member_activity_from_start_no_past_data.py b/tests/integration/test_member_activity_from_start_no_past_data.py index 2c886da..17c9f2f 100644 --- a/tests/integration/test_member_activity_from_start_no_past_data.py +++ b/tests/integration/test_member_activity_from_start_no_past_data.py @@ -3,7 +3,8 @@ from bson.objectid import ObjectId -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_member_activities_from_start_empty_memberactivities(): @@ -14,93 +15,61 @@ def test_analyzer_member_activities_from_start_empty_memberactivities(): # first create the collections guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) - db_access.db_mongo_client["Core"]["platforms"].delete_one({"metadata.id": guildId}) - db_access.db_mongo_client.drop_database(guildId) + db_access.db_mongo_client["Core"].drop_collection("platforms") + db_access.db_mongo_client.drop_database(platform_id) - action = { - "INT_THR": 1, - "UW_DEG_THR": 1, - "PAUSED_T_THR": 1, - "CON_T_THR": 4, - "CON_O_THR": 3, - "EDGE_STR_THR": 5, - "UW_THR_DEG_THR": 5, - "VITAL_T_THR": 4, - "VITAL_O_THR": 3, - "STILL_T_THR": 2, - "STILL_O_THR": 2, - "DROP_H_THR": 2, - "DROP_I_THR": 1, - } - - db_access.db_mongo_client["Core"]["platforms"].insert_one( - { - "_id": ObjectId(platform_id), - "name": "discord", - "metadata": { - "id": guildId, - "icon": "111111111111111111111111", - "name": "A guild", - "selectedChannels": ["1020707129214111827"], - "window": {"period_size": 7, "step_size": 1}, - "action": action, - "period": datetime.now() - timedelta(days=30), - }, - "community": ObjectId("aabbccddeeff001122334455"), - "disconnectedAt": None, - "connectedAt": (datetime.now() - timedelta(days=40)), - "isInProgress": True, - "createdAt": datetime(2023, 11, 1), - "updatedAt": datetime(2023, 11, 1), - } + analyzer = setup_platform( + db_access, + platform_id, + discordId_list=["3451791"], + days_ago_period=30, + community_id="aabbccddeeff001122334455", ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") - db_access.db_mongo_client[guildId]["guildmembers"].insert_one( + db_access.db_mongo_client[platform_id]["rawmembers"].insert_one( { - "discordId": "3451791", - "username": "sample_user", - "roles": ["99909821"], - "joinedAt": (datetime.now() - timedelta(days=10)), - "avatar": "3ddd6e429f75d6a711d0a58ba3060694", - "isBot": False, - "discriminator": "0", + "id": "3451791", + "joined_at": (datetime.now() - timedelta(days=10)), + "left_at": None, + "is_bot": False, + "options": {}, } ) rawinfo_samples = [] for i in range(150): - sample = { - "type": 0, - "author": "3451791", - "content": "test10", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"77776325{i}", - "channelId": "41414262", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = "3451791" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + analyzer.recompute() - memberactivities_data = db_access.db_mongo_client[guildId][ + memberactivities_data = db_access.db_mongo_client[platform_id][ "memberactivities" ].find_one({}) - heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + heatmaps_data = db_access.db_mongo_client[platform_id]["heatmaps"].find_one({}) guild_document = db_access.db_mongo_client["Core"]["platforms"].find_one( {"metadata.id": guildId} ) diff --git a/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py b/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py index e237bae..081f39d 100644 --- a/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py +++ b/tests/integration/test_member_activity_from_start_with_guild_heatmaps_available.py @@ -1,10 +1,10 @@ # test analyzing memberactivities from datetime import datetime, timedelta -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_heatmaps import create_empty_heatmaps_data from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_member_activities_from_start_available_heatmaps(): @@ -15,57 +15,53 @@ def test_analyzer_member_activities_from_start_available_heatmaps(): # first create the collections guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) - setup_db_guild( - db_access, platform_id, guildId, discordId_list=["973993299281076285"] - ) + analyzer = setup_platform(db_access, platform_id, discordId_list=["user_0"]) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") # filling memberactivities with some data memberactivity_data = create_empty_memberactivities_data( datetime(year=2023, month=6, day=5) ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) # filling heatmaps with some data heatmaps_data = create_empty_heatmaps_data(datetime(year=2023, month=6, day=5)) - db_access.db_mongo_client[guildId]["heatmaps"].insert_many(heatmaps_data) + db_access.db_mongo_client[platform_id]["heatmaps"].insert_many(heatmaps_data) rawinfo_samples = [] for i in range(150): + author = "user_0" sample = { - "type": 0, - "author": "973993299281076285", - "content": "test10", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + analyzer.recompute() - memberactivities_data = db_access.db_mongo_client[guildId][ + memberactivities_data = db_access.db_mongo_client[platform_id][ "memberactivities" ].find_one({}) - heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + heatmaps_data = db_access.db_mongo_client[platform_id]["heatmaps"].find_one({}) guild_document = db_access.db_mongo_client["Core"]["platforms"].find_one( {"metadata.id": guildId} ) diff --git a/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py b/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py index 21a614b..0614da6 100644 --- a/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py +++ b/tests/integration/test_member_activity_from_start_with_guild_memberactivities_available.py @@ -1,9 +1,9 @@ # test analyzing memberactivities from datetime import datetime, timedelta -from .utils.analyzer_setup import launch_db_access, setup_analyzer +from .utils.analyzer_setup import launch_db_access from .utils.mock_memberactivities import create_empty_memberactivities_data -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.setup_platform import setup_platform def test_analyzer_member_activities_from_start_available_member_activity(): @@ -14,52 +14,48 @@ def test_analyzer_member_activities_from_start_available_member_activity(): # first create the collections guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) - setup_db_guild( - db_access, platform_id, guildId, discordId_list=["973993299281076285"] - ) + analyzer = setup_platform(db_access, platform_id, discordId_list=["user_0"]) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") memberactivity_data = create_empty_memberactivities_data( datetime(year=2023, month=6, day=5) ) - db_access.db_mongo_client[guildId]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( memberactivity_data ) rawinfo_samples = [] for i in range(150): + author = "user_0" sample = { - "type": 0, - "author": "973993299281076285", - "content": "test10", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + analyzer.recompute() - memberactivities_data = db_access.db_mongo_client[guildId][ + memberactivities_data = db_access.db_mongo_client[platform_id][ "memberactivities" ].find_one({}) - heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + heatmaps_data = db_access.db_mongo_client[platform_id]["heatmaps"].find_one({}) guild_document = db_access.db_mongo_client["Core"]["platforms"].find_one( {"metadata.id": guildId} ) diff --git a/tests/integration/test_member_activity_from_start_with_one_interval.py b/tests/integration/test_member_activity_from_start_with_one_interval.py index 1a35c2b..55e95a7 100644 --- a/tests/integration/test_member_activity_from_start_with_one_interval.py +++ b/tests/integration/test_member_activity_from_start_with_one_interval.py @@ -1,8 +1,8 @@ # test analyzing memberactivities from datetime import datetime, timedelta -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_analyzer_from_start_one_interval(): @@ -12,45 +12,41 @@ def test_analyzer_from_start_one_interval(): # first create the collections platform_id = "515151515151515151515151" guildId = "1234" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) - setup_db_guild( - db_access, platform_id, guildId, discordId_list=["973993299281076285"] - ) + analyzer = setup_platform(db_access, platform_id, discordId_list=["user_0"]) rawinfo_samples = [] for i in range(150): + author = "user_0" sample = { - "type": 0, - "author": "973993299281076285", - "content": "test10", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") - analyzer = setup_analyzer(guildId) - analyzer.recompute_analytics() + analyzer.recompute() - memberactivities_data = db_access.db_mongo_client[guildId][ + memberactivities_data = db_access.db_mongo_client[platform_id][ "memberactivities" ].find_one({}) - heatmaps_data = db_access.db_mongo_client[guildId]["heatmaps"].find_one({}) + heatmaps_data = db_access.db_mongo_client[platform_id]["heatmaps"].find_one({}) guild_document = db_access.db_mongo_client["Core"]["platforms"].find_one( {"metadata.id": guildId} ) diff --git a/tests/integration/test_member_activity_utils.py b/tests/integration/test_member_activity_utils.py index 5023847..bcf8d69 100644 --- a/tests/integration/test_member_activity_utils.py +++ b/tests/integration/test_member_activity_utils.py @@ -1,51 +1,44 @@ from datetime import datetime, timedelta -from discord_analyzer.analyzer.memberactivity_utils import MemberActivityUtils +from tc_analyzer_lib.metrics.memberactivity_utils import MemberActivityUtils -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_utils_get_members(): platform_id = "515151515151515151515151" - guildId = "1012430565959553145" - users = ["973993299281076285"] - db_access = launch_db_access(guildId) - setup_db_guild( - db_access, platform_id, guildId, discordId_list=users, days_ago_period=7 - ) - analyzer = setup_analyzer(guildId) - - setup_db_guild(db_access, platform_id, guildId, discordId_list=users) + users = ["user_0"] + db_access = launch_db_access(platform_id) + _ = setup_platform(db_access, platform_id, discordId_list=users, days_ago_period=7) rawinfo_samples = [] for i in range(150): + author = "user_0" sample = { - "type": 0, - "author": "973993299281076285", - "content": "test10", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", } rawinfo_samples.append(sample) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].drop_collection("heatmaps") + db_access.db_mongo_client[platform_id].drop_collection("memberactivities") - memberactivities_utils = MemberActivityUtils(analyzer.DB_connections) + memberactivities_utils = MemberActivityUtils() - database_users = memberactivities_utils.get_all_users(guildId=guildId) + database_users = memberactivities_utils.get_all_users(guildId=platform_id) print(f"database_users: {database_users}") assert database_users == users diff --git a/tests/integration/test_memberactivities_mentions.py b/tests/integration/test_memberactivities_mentions.py index f31f816..96eceb6 100644 --- a/tests/integration/test_memberactivities_mentions.py +++ b/tests/integration/test_memberactivities_mentions.py @@ -1,14 +1,14 @@ from datetime import datetime, timedelta from unittest import TestCase -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform class TestMemberActivitiesReply(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + self.platform_id = "515151515151515151515151" + self.db_access = launch_db_access(self.platform_id) def test_single_user_interaction(self): users_id_list = ["user1", "user2"] @@ -27,45 +27,68 @@ def test_single_user_interaction(self): "DROP_H_THR": 2, "DROP_I_THR": 1, } - platform_id = "515151515151515151515151" - setup_db_guild( + analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.platform_id]["heatmaps"].delete_many({}) + self.db_access.db_mongo_client[self.platform_id].create_collection("heatmaps") rawinfo_samples = [] for i in range(35 * 24): - sample = { - "type": 0, - "author": "user1", - "content": f"test message {i} @user2", - "user_mentions": ["user2"], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = "user1" + mentioned_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "mention", + "type": "emitter", + "users_engaged_id": [mentioned_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": mentioned_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "mention", + "type": "receiver", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find( {}, { "_id": 0, diff --git a/tests/integration/test_memberactivities_reaction.py b/tests/integration/test_memberactivities_reaction.py index e4b95db..da47a47 100644 --- a/tests/integration/test_memberactivities_reaction.py +++ b/tests/integration/test_memberactivities_reaction.py @@ -1,14 +1,14 @@ from datetime import datetime, timedelta from unittest import TestCase -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform class TestMemberActivitiesReactions(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + self.platform_id = "60d5ec44f9a3c2b6d7e2d11a" + self.db_access = launch_db_access(self.platform_id) def test_single_user_action(self): """ @@ -30,49 +30,76 @@ def test_single_user_action(self): "DROP_H_THR": 2, "DROP_I_THR": 1, } - platform_id = "515151515151515151515151" - setup_db_guild( + analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + self.db_access.db_mongo_client[self.platform_id].drop_collection("heatmaps") + self.db_access.db_mongo_client[self.platform_id].drop_collection( + "rawmemberactivities" + ) rawinfo_samples = [] for i in range(35 * 24): - sample = { - "type": 0, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": ["user2,👍"], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = "user1" + reacted_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reaction", + "type": "receiver", + "users_engaged_id": [reacted_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": reacted_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reaction", + "type": "emitter", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find( {}, { "_id": 0, "all_active": 1, + "date": 1, }, ) diff --git a/tests/integration/test_memberactivities_reply.py b/tests/integration/test_memberactivities_reply.py index e18c0f0..ff5a400 100644 --- a/tests/integration/test_memberactivities_reply.py +++ b/tests/integration/test_memberactivities_reply.py @@ -1,18 +1,17 @@ from datetime import datetime, timedelta from unittest import TestCase -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform class TestMemberActivitiesReply(TestCase): def setUp(self) -> None: - self.guildId = "1234" - self.db_access = launch_db_access(self.guildId) + self.platform_id = "515151515151515151515151" + self.db_access = launch_db_access(self.platform_id) def test_single_user_interaction(self): users_id_list = ["user1", "user2"] - platform_id = "515151515151515151515151" action = { "INT_THR": 1, @@ -29,43 +28,70 @@ def test_single_user_interaction(self): "DROP_H_THR": 2, "DROP_I_THR": 1, } - setup_db_guild( + analyzer = setup_platform( self.db_access, - platform_id, - self.guildId, + self.platform_id, discordId_list=users_id_list, days_ago_period=35, action=action, ) - self.db_access.db_mongo_client[self.guildId]["heatmaps"].delete_many({}) - self.db_access.db_mongo_client[self.guildId].create_collection("heatmaps") + + self.db_access.db_mongo_client[self.platform_id].drop_collection( + "rawmemberactivities" + ) + self.db_access.db_mongo_client[self.platform_id].drop_collection("heatmaps") rawinfo_samples = [] for i in range(35 * 24): - sample = { - "type": 19, - "author": "user1", - "content": f"test message {i}", - "user_mentions": [], - "role_mentions": [], - "reactions": [], - "replied_user": "user2", - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = "user1" + replied_user = "user2" + samples = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "emitter", + "users_engaged_id": [replied_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": replied_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "reply", + "type": "receiver", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(samples) - self.db_access.db_mongo_client[self.guildId]["rawinfos"].insert_many( - rawinfo_samples - ) - analyzer = setup_analyzer(self.guildId) - analyzer.recompute_analytics() - cursor = self.db_access.db_mongo_client[self.guildId]["memberactivities"].find( + self.db_access.db_mongo_client[self.platform_id][ + "rawmemberactivities" + ].insert_many(rawinfo_samples) + analyzer.recompute() + cursor = self.db_access.db_mongo_client[self.platform_id][ + "memberactivities" + ].find( {}, { "_id": 0, diff --git a/tests/integration/test_mentioned_active_members_from_message.py b/tests/integration/test_mentioned_active_members_from_message.py index f14db77..046db0f 100644 --- a/tests/integration/test_mentioned_active_members_from_message.py +++ b/tests/integration/test_mentioned_active_members_from_message.py @@ -1,30 +1,32 @@ from datetime import datetime, timedelta -from .utils.analyzer_setup import launch_db_access, setup_analyzer -from .utils.remove_and_setup_guild import setup_db_guild +from .utils.analyzer_setup import launch_db_access +from .utils.setup_platform import setup_platform def test_mention_active_members_from_rawinfo(): """ test whether the people are being mentioned are active or not the shouldn't considered as active as we're not counting them - the rawinfos is used + the rawmemberactivities is used """ # first create the collections - guildId = "1234" platform_id = "515151515151515151515151" - db_access = launch_db_access(guildId) + db_access = launch_db_access(platform_id) acc_id = [ "user1", "user2", ] - setup_db_guild( - db_access, platform_id, guildId, discordId_list=acc_id, days_ago_period=7 + analyzer = setup_platform( + db_access=db_access, + platform_id=platform_id, + discordId_list=acc_id, + days_ago_period=7, ) - db_access.db_mongo_client[guildId].create_collection("heatmaps") - db_access.db_mongo_client[guildId].create_collection("memberactivities") + db_access.db_mongo_client[platform_id].create_collection("heatmaps") + db_access.db_mongo_client[platform_id].create_collection("memberactivities") # generating rawinfo samples rawinfo_samples = [] @@ -32,27 +34,52 @@ def test_mention_active_members_from_rawinfo(): # generating random rawinfo data # all user1 mentioning user2 for i in range(150): - sample = { - "type": 0, - "author": "user1", - "content": f"test{i}", - "user_mentions": ["user2"], - "role_mentions": [], - "reactions": [], - "replied_user": None, - "createdDate": (datetime.now() - timedelta(hours=i)), - "messageId": f"11188143219343360{i}", - "channelId": "1020707129214111827", - "channelName": "general", - "threadId": None, - "threadName": None, - "isGeneratedByWebhook": False, - } - rawinfo_samples.append(sample) + author = "user1" + mentioned_user = "user2" + sample = [ + { + "actions": [{"name": "message", "type": "emitter"}], + "author_id": author, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "mention", + "type": "emitter", + "users_engaged_id": [mentioned_user], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + { + "actions": [], + "author_id": mentioned_user, + "date": datetime.now() - timedelta(hours=i), + "interactions": [ + { + "name": "mention", + "type": "receiver", + "users_engaged_id": [author], + } + ], + "metadata": { + "bot_activity": False, + "channel_id": "1020707129214111827", + "thread_id": None, + }, + "source_id": f"11188143219343360{i}", + }, + ] + rawinfo_samples.extend(sample) - db_access.db_mongo_client[guildId]["rawinfos"].insert_many(rawinfo_samples) + db_access.db_mongo_client[platform_id]["rawmemberactivities"].insert_many( + rawinfo_samples + ) - analyzer = setup_analyzer(guildId) analyzer.run_once() memberactivities_cursor = db_access.query_db_find( diff --git a/tests/integration/test_neo4j_compute_metrics.py b/tests/integration/test_neo4j_compute_metrics.py index 0cd203c..b94d825 100644 --- a/tests/integration/test_neo4j_compute_metrics.py +++ b/tests/integration/test_neo4j_compute_metrics.py @@ -1,5 +1,6 @@ import numpy as np -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -18,59 +19,66 @@ def test_guild_results_available(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - analytics = Neo4JAnalytics() + analytics = Neo4JAnalytics(platform_id, graph_schema) - analytics.compute_metrics(guildId=guildId, from_start=False) + analytics.compute_metrics(from_start=False) accounts_result = neo4j_ops.gds.run_cypher( f""" - MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (g:Guild {{guildId: '{guildId}'}}) + MATCH (a:{user_label}) -[r:{interacted_in}]-> (g:{platform_label} {{id: '{platform_id}'}}) MATCH (g) -[r2:HAVE_METRICS]->(g) RETURN - a.userId AS userId, + a.id AS userId, r.date AS date, r.localClusteringCoefficient AS localClusteringCoefficient, r.status AS status @@ -85,22 +93,22 @@ def test_guild_results_available(): guild_results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}}) -[r:HAVE_METRICS]->(g) + MATCH (g:{platform_label} {{id: '{platform_id}'}}) -[r:HAVE_METRICS]->(g) RETURN r.date as date, - g.guildId as guildId, + g.id as platformId, r.decentralizationScore as decentralizationScore """ ) for _, row in guild_results.iterrows(): print(row) assert row["date"] in [yesterday, today] - assert row["guildId"] == guildId + assert row["platformId"] == platform_id assert bool(np.isnan(row["decentralizationScore"])) is False results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}})-[r:HAVE_METRICS]->(g) + MATCH (g:{platform_label} {{id: '{platform_id}'}})-[r:HAVE_METRICS]->(g) RETURN r.date as date, r.louvainModularityScore as modularityScore """ ) diff --git a/tests/integration/test_neo4j_compute_metrics_from_start.py b/tests/integration/test_neo4j_compute_metrics_from_start.py index 24b1faf..a4cbc2d 100644 --- a/tests/integration/test_neo4j_compute_metrics_from_start.py +++ b/tests/integration/test_neo4j_compute_metrics_from_start.py @@ -1,5 +1,6 @@ import numpy as np -from discord_analyzer.analyzer.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.metrics.neo4j_analytics import Neo4JAnalytics +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -18,58 +19,64 @@ def test_neo4j_compute_metrics_from_start(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" - + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - analytics = Neo4JAnalytics() + analytics = Neo4JAnalytics(platform_id, graph_schema) - analytics.compute_metrics(guildId=guildId, from_start=True) + analytics.compute_metrics(from_start=True) accounts_result = neo4j_ops.gds.run_cypher( f""" - MATCH (a:DiscordAccount) -[r:INTERACTED_IN]-> (g:Guild {{guildId: '{guildId}'}}) + MATCH (a:{user_label}) -[r:{interacted_in}]-> (g:{platform_label} {{id: '{platform_id}'}}) RETURN - a.userId AS userId, + a.id AS userId, r.date AS date, r.localClusteringCoefficient AS localClusteringCoefficient, r.status AS status @@ -98,10 +105,10 @@ def test_neo4j_compute_metrics_from_start(): guild_results = neo4j_ops.gds.run_cypher( f""" - MATCH (g:Guild {{guildId: '{guildId}'}}) -[r:HAVE_METRICS]->(g) + MATCH (g:{platform_label} {{id: '{platform_id}'}}) -[r:HAVE_METRICS]->(g) RETURN r.date as date, - g.guildId as guildId, + g.id as platformId, r.decentralizationScore as decentralizationScore """ ) @@ -111,6 +118,6 @@ def test_neo4j_compute_metrics_from_start(): for _, row in guild_results.iterrows(): print(row) assert row["date"] in [yesterday, today] - assert row["guildId"] == guildId + assert row["platformId"] == platform_id assert row["decentralizationScore"] is not None assert bool(np.isnan(row["decentralizationScore"])) is False diff --git a/tests/integration/test_neo4j_projection_utils_computed_dates.py b/tests/integration/test_neo4j_projection_utils_computed_dates.py index b359e55..dcf6894 100644 --- a/tests/integration/test_neo4j_projection_utils_computed_dates.py +++ b/tests/integration/test_neo4j_projection_utils_computed_dates.py @@ -1,4 +1,5 @@ -from discord_analyzer.analysis.neo4j_utils.projection_utils import ProjectionUtils +from tc_analyzer_lib.algorithms.neo4j_analysis.utils import ProjectionUtils +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -13,65 +14,72 @@ def test_neo4j_projection_utils_get_computed_dates(): # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - MERGE (a)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (a)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) - MERGE (b)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (b)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) - MERGE (c)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (c)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) - MERGE (d)-[:INTERACTED_IN {{date: {yesterday}}}]->(g) - MERGE (e)-[:INTERACTED_IN {{date: {today}, localClusteringCoefficient: 1}}]->(g) + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + MERGE (a)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (a)-[:{interacted_in} {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (b)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (b)-[:{interacted_in} {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (c)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (c)-[:{interacted_in} {{date: {today}, localClusteringCoefficient: 1}}]->(g) + MERGE (d)-[:{interacted_in} {{date: {yesterday}}}]->(g) + MERGE (e)-[:{interacted_in} {{date: {today}, localClusteringCoefficient: 1}}]->(g) - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - projection_utils = ProjectionUtils(guildId=guildId) + projection_utils = ProjectionUtils(platform_id, graph_schema) computed_dates = projection_utils.get_computed_dates( - """ - MATCH (:DiscordAccount)-[r:INTERACTED_IN]->(g:Guild {guildId: $guild_id}) + f""" + MATCH (:{user_label})-[r:{interacted_in}]->(g:{platform_label} {{id: $platform_id}}) WHERE r.localClusteringCoefficient is NOT NULL RETURN r.date as computed_dates """, - guild_id=guildId, + platform_id=platform_id, ) print(computed_dates) diff --git a/tests/integration/test_network_graph_creation.py b/tests/integration/test_network_graph_creation.py deleted file mode 100644 index 312404c..0000000 --- a/tests/integration/test_network_graph_creation.py +++ /dev/null @@ -1,178 +0,0 @@ -# test out local clustering coefficient with all nodes connected -from datetime import datetime, timedelta - -import networkx as nx -import numpy as np -from discord_analyzer.analysis.utils.activity import Activity -from tc_neo4j_lib.neo4j_ops import Neo4jOps - -from .utils.mock_graph import generate_mock_graph, store_mock_data_in_neo4j - - -def test_network_graph_create(): - community_id = "4321" - neo4j_ops = Neo4jOps.get_instance() - # deleting all data - neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") - - guildId = "1234" - acc_names = np.array(["1000", "1001", "1002"]) - graph_dict = {} - - # saving the desired outputs - desired_outputs = [] - - # Generating 1st graph - np.random.seed(123) - int_matrix = {} - int_matrix[Activity.Reply] = np.array( - [ - [0, 1, 2], - [0, 0, 3], - [0, 4, 0], - ] - ) - - int_matrix[Activity.Mention] = np.array( - [ - [0, 1, 2], - [0, 0, 3], - [0, 4, 0], - ] - ) - - int_matrix[Activity.Reaction] = np.array( - [ - [0, 1, 2], - [0, 0, 3], - [0, 4, 0], - ] - ) - - graph = generate_mock_graph(int_matrix, acc_names) - - node_att = {} - for i, node in enumerate(list(graph)): - node_att[node] = acc_names[i] - - nx.set_node_attributes(graph, node_att, "acc_name") - - graph_date = datetime.now() - graph_date_timestamp = graph_date.replace( - hour=0, minute=0, second=0, microsecond=0 - ).timestamp() - graph_dict[graph_date] = graph - - desired_outputs.extend( - [ - ["1000", 1, graph_date_timestamp, "1001"], - ["1000", 2, graph_date_timestamp, "1002"], - ["1001", 3, graph_date_timestamp, "1002"], - ["1002", 4, graph_date_timestamp, "1001"], - ] - ) - - # Generating 2nd graph - int_matrix = {} - int_matrix[Activity.Reply] = np.array( - [ - [0, 0, 1], - [2, 0, 5], - [0, 0, 0], - ] - ) - - int_matrix[Activity.Mention] = np.array( - [ - [0, 0, 1], - [2, 0, 5], - [0, 0, 0], - ] - ) - - int_matrix[Activity.Reaction] = np.array( - [ - [0, 0, 1], - [2, 0, 5], - [0, 0, 0], - ] - ) - - graph = generate_mock_graph(int_matrix, acc_names) - - nx.set_node_attributes(graph, node_att, "acc_name") - - graph_date = datetime.now() + timedelta(days=-1) - graph_date_timestamp = graph_date.replace( - hour=0, minute=0, second=0, microsecond=0 - ).timestamp() - graph_dict[graph_date] = graph - - desired_outputs.extend( - [ - ["1000", 1, graph_date_timestamp, "1002"], - ["1001", 2, graph_date_timestamp, "1000"], - ["1001", 5, graph_date_timestamp, "1002"], - ] - ) - - # generating 3rd graph - int_matrix = {} - int_matrix[Activity.Reply] = np.array( - [ - [0, 0, 3], - [0, 0, 0], - [1, 0, 0], - ] - ) - int_matrix[Activity.Mention] = np.array( - [ - [0, 0, 3], - [0, 0, 0], - [1, 0, 0], - ] - ) - int_matrix[Activity.Reaction] = np.array( - [ - [0, 0, 3], - [0, 0, 0], - [1, 0, 0], - ] - ) - - graph = generate_mock_graph(int_matrix, acc_names) - nx.set_node_attributes(graph, node_att, "acc_name") - - graph_date = datetime.now() + timedelta(days=-8) - graph_date_timestamp = graph_date.replace( - hour=0, minute=0, second=0, microsecond=0 - ).timestamp() - graph_dict[graph_date] = graph - - desired_outputs.extend( - [ - ["1000", 3, graph_date_timestamp, "1002"], - ["1002", 1, graph_date_timestamp, "1000"], - ] - ) - - # DATABASE SAVING - - store_mock_data_in_neo4j( - graph_dict=graph_dict, guildId=guildId, community_id=community_id - ) - - results = neo4j_ops.gds.run_cypher( - f""" - MATCH (a:DiscordAccount) -[:IS_MEMBER] -> (g:Guild {{guildId: '{guildId}'}}) - MATCH (a)-[r:INTERACTED_WITH]-> (b:DiscordAccount) - RETURN - a.userId as fromUserId, - r.weight as weight, - r.date as date, - b.userId as toUserId - """ - ) - print(desired_outputs) - print(results) - assert desired_outputs in results.values diff --git a/tests/integration/test_node_stats.py b/tests/integration/test_node_stats.py index bcbf9cc..ae1b08a 100644 --- a/tests/integration/test_node_stats.py +++ b/tests/integration/test_node_stats.py @@ -1,5 +1,6 @@ # test out local clustering coefficient with all nodes connected -from discord_analyzer.analysis.neo4j_analysis.analyzer_node_stats import NodeStats +from tc_analyzer_lib.algorithms.neo4j_analysis.analyzer_node_stats import NodeStats +from tc_analyzer_lib.schemas import GraphSchema from tc_neo4j_lib.neo4j_ops import Neo4jOps @@ -15,61 +16,69 @@ def test_node_stats(): # deleting all data neo4j_ops.gds.run_cypher("MATCH (n) DETACH DELETE (n)") + graph_schema = GraphSchema(platform="discord") + platform_id = "5151515151515" + + user_label = graph_schema.user_label + platform_label = graph_schema.platform_label + interacted_with = graph_schema.interacted_with_rel + interacted_in = graph_schema.interacted_in_rel + is_member = graph_schema.member_relation + # timestamps today = 1689280200.0 yesterday = 1689193800.0 - guildId = "1234" # creating some nodes with data neo4j_ops.gds.run_cypher( f""" - CREATE (a:DiscordAccount) -[:IS_MEMBER]->(g:Guild {{guildId: '{guildId}'}}) - CREATE (b:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (c:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (d:DiscordAccount) -[:IS_MEMBER]->(g) - CREATE (e:DiscordAccount) -[:IS_MEMBER]->(g) - SET a.userId = "1000" - SET b.userId = "1001" - SET c.userId = "1002" - SET d.userId = "1003" - SET e.userId = "1004" - MERGE (a) -[r:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (a) -[r2:INTERACTED_WITH {{date: {today}, weight: 2}}]->(b) - MERGE (a) -[r3:INTERACTED_WITH {{date: {yesterday}, weight: 3}}]->(d) - MERGE (c) -[r4:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(b) - MERGE (c) -[r5:INTERACTED_WITH {{date: {today}, weight: 1}}]->(b) - MERGE (c) -[r6:INTERACTED_WITH {{date: {yesterday}, weight: 2}}]->(d) - MERGE (d) -[r7:INTERACTED_WITH {{date: {yesterday}, weight: 1}}]->(b) - MERGE (c) -[r8:INTERACTED_WITH {{date: {today}, weight: 2}}]->(a) - MERGE (d) -[r9:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (b) -[r10:INTERACTED_WITH {{date: {today}, weight: 2}}]->(d) - MERGE (d) -[r11:INTERACTED_WITH {{date: {today}, weight: 1}}]->(c) - MERGE (e) -[r12:INTERACTED_WITH {{date: {today}, weight: 3}}]->(b) - - SET r.guildId = '{guildId}' - SET r2.guildId = '{guildId}' - SET r3.guildId = '{guildId}' - SET r4.guildId = '{guildId}' - SET r5.guildId = '{guildId}' - SET r6.guildId = '{guildId}' - SET r7.guildId = '{guildId}' - SET r8.guildId = '{guildId}' - SET r9.guildId = '{guildId}' - SET r10.guildId = '{guildId}' - SET r11.guildId = '{guildId}' - SET r12.guildId = '{guildId}' + CREATE (a:{user_label}) -[:{is_member}]->(g:{platform_label} {{id: '{platform_id}'}}) + CREATE (b:{user_label}) -[:{is_member}]->(g) + CREATE (c:{user_label}) -[:{is_member}]->(g) + CREATE (d:{user_label}) -[:{is_member}]->(g) + CREATE (e:{user_label}) -[:{is_member}]->(g) + SET a.id = "1000" + SET b.id = "1001" + SET c.id = "1002" + SET d.id = "1003" + SET e.id = "1004" + MERGE (a) -[r:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (a) -[r2:{interacted_with} {{date: {today}, weight: 2}}]->(b) + MERGE (a) -[r3:{interacted_with} {{date: {yesterday}, weight: 3}}]->(d) + MERGE (c) -[r4:{interacted_with} {{date: {yesterday}, weight: 2}}]->(b) + MERGE (c) -[r5:{interacted_with} {{date: {today}, weight: 1}}]->(b) + MERGE (c) -[r6:{interacted_with} {{date: {yesterday}, weight: 2}}]->(d) + MERGE (d) -[r7:{interacted_with} {{date: {yesterday}, weight: 1}}]->(b) + MERGE (c) -[r8:{interacted_with} {{date: {today}, weight: 2}}]->(a) + MERGE (d) -[r9:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (b) -[r10:{interacted_with} {{date: {today}, weight: 2}}]->(d) + MERGE (d) -[r11:{interacted_with} {{date: {today}, weight: 1}}]->(c) + MERGE (e) -[r12:{interacted_with} {{date: {today}, weight: 3}}]->(b) + + SET r.platformId = '{platform_id}' + SET r2.platformId = '{platform_id}' + SET r3.platformId = '{platform_id}' + SET r4.platformId = '{platform_id}' + SET r5.platformId = '{platform_id}' + SET r6.platformId = '{platform_id}' + SET r7.platformId = '{platform_id}' + SET r8.platformId = '{platform_id}' + SET r9.platformId = '{platform_id}' + SET r10.platformId = '{platform_id}' + SET r11.platformId = '{platform_id}' + SET r12.platformId = '{platform_id}' """ ) - node_stats = NodeStats(threshold=2) - node_stats.compute_stats(guildId="1234", from_start=True) + node_stats = NodeStats(platform_id, graph_schema, threshold=2) + node_stats.compute_stats(from_start=True) # getting the results results = neo4j_ops.gds.run_cypher( f""" - MATCH (a:DiscordAccount) - -[r:INTERACTED_IN] -> (g:Guild {{guildId: '{guildId}'}}) - RETURN a.userId as userId, r.date as date, r.status as status + MATCH (a:{user_label}) + -[r:{interacted_in}] -> (g:{platform_label} {{id: '{platform_id}'}}) + RETURN a.id as userId, r.date as date, r.status as status """ ) diff --git a/tests/integration/test_platforms_base_config.py b/tests/integration/test_platforms_base_config.py new file mode 100644 index 0000000..ec39cab --- /dev/null +++ b/tests/integration/test_platforms_base_config.py @@ -0,0 +1,124 @@ +import unittest + +from tc_analyzer_lib.schemas import ( + ActivityDirection, + ActivityType, + HourlyAnalytics, + RawAnalytics, +) +from tc_analyzer_lib.schemas.platform_configs.config_base import PlatformConfigBase + + +class TestPlatformBaseConfig(unittest.TestCase): + def test_config_to_dict(self): + analytics = HourlyAnalytics( + name="thr_messages", + type=ActivityType.ACTION, + member_activities_used=False, + direction=ActivityDirection.RECEIVER, + rawmemberactivities_condition={"thread_id": {"$ne": None}}, + activity_name="actions", + ) + expected_dict = { + "name": "thr_messages", + "type": "actions", + "member_activities_used": False, + "direction": "receiver", + "rawmemberactivities_condition": {"thread_id": {"$ne": None}}, + "activity_name": "actions", + } + self.assertEqual(analytics.to_dict(), expected_dict) + + def test_analytics_from_dict(self): + data = { + "name": "thr_messages", + "type": "actions", + "member_activities_used": False, + "direction": "emitter", + "rawmemberactivities_condition": {"thread_id": {"$ne": None}}, + } + analytics = HourlyAnalytics.from_dict(data) + self.assertEqual(analytics.name, "thr_messages") + self.assertEqual(analytics.type, ActivityType.ACTION) + self.assertFalse(analytics.member_activities_used) + self.assertEqual( + analytics.rawmemberactivities_condition, {"thread_id": {"$ne": None}} + ) + + def test_analytics_data_to_dict(self): + hourly_analytics = [ + HourlyAnalytics( + name="thr_messages", + type=ActivityType.ACTION, + member_activities_used=False, + direction=ActivityDirection.EMITTER, + rawmemberactivities_condition={"thread_id": {"$ne": None}}, + ) + ] + raw_analytics = [ + RawAnalytics( + name="replied_per_acc", + type=ActivityType.INTERACTION, + member_activities_used=True, + direction=ActivityDirection.RECEIVER, + ) + ] + analytics_data = PlatformConfigBase( + platform="discord", + resource_identifier="channel_id", + hourly_analytics=hourly_analytics, + raw_analytics=raw_analytics, + ) + expected_dict = { + "platform": "discord", + "resource_identifier": "channel_id", + "hourly_analytics": [analytic.to_dict() for analytic in hourly_analytics], + "raw_analytics": [analytic.to_dict() for analytic in raw_analytics], + } + self.assertEqual(analytics_data.to_dict(), expected_dict) + + def test_analytics_data_from_dict(self): + data = { + "platform": "discord", + "resource_identifier": "chat_id", + "hourly_analytics": [ + { + "name": "thr_messages", + "type": "actions", + "member_activities_used": False, + "rawmemberactivities_condition": {"thread_id": {"$ne": None}}, + "direction": "emitter", + } + ], + "raw_analytics": [ + { + "name": "replied_per_acc", + "type": "interactions", + "member_activities_used": True, + "direction": "receiver", + } + ], + } + analyzer_config = PlatformConfigBase.from_dict(data) + self.assertEqual(analyzer_config.platform, "discord") + self.assertEqual(analyzer_config.resource_identifier, "chat_id") + self.assertEqual(len(analyzer_config.hourly_analytics), 1) + self.assertEqual(analyzer_config.hourly_analytics[0].name, "thr_messages") + self.assertEqual(analyzer_config.hourly_analytics[0].type, ActivityType.ACTION) + self.assertEqual( + analyzer_config.hourly_analytics[0].direction, ActivityDirection.EMITTER + ) + self.assertFalse(analyzer_config.hourly_analytics[0].member_activities_used) + self.assertEqual( + analyzer_config.hourly_analytics[0].rawmemberactivities_condition, + {"thread_id": {"$ne": None}}, + ) + self.assertEqual(len(analyzer_config.raw_analytics), 1) + self.assertEqual(analyzer_config.raw_analytics[0].name, "replied_per_acc") + self.assertEqual( + analyzer_config.raw_analytics[0].type, ActivityType.INTERACTION + ) + self.assertEqual( + analyzer_config.raw_analytics[0].direction, ActivityDirection.RECEIVER + ) + self.assertTrue(analyzer_config.raw_analytics[0].member_activities_used) diff --git a/tests/integration/test_publish_on_success.py b/tests/integration/test_publish_on_success_recompute_false.py similarity index 85% rename from tests/integration/test_publish_on_success.py rename to tests/integration/test_publish_on_success_recompute_false.py index f788527..007b9f6 100644 --- a/tests/integration/test_publish_on_success.py +++ b/tests/integration/test_publish_on_success_recompute_false.py @@ -1,21 +1,20 @@ import os from datetime import datetime, timedelta -from automation.utils.interfaces import ( +from bson.objectid import ObjectId +from dotenv import load_dotenv +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, AutomationTrigger, ) -from bson.objectid import ObjectId -from discord_utils import publish_on_success -from dotenv import load_dotenv -from utils.credentials import get_mongo_credentials +from tc_analyzer_lib.publish_on_success import publish_on_success from .utils.analyzer_setup import launch_db_access -def test_publish_on_success_check_notification_choreographies(): +def test_publish_on_success_recompute_false_check_notification_choreographies(): """ test the publish on success functions we want to check the database if the notify choreographies are created @@ -33,9 +32,9 @@ def test_publish_on_success_check_notification_choreographies(): {"_id": ObjectId(platform_id)} ) - db_access.db_mongo_client[guild_id].drop_collection("memberactivities") + db_access.db_mongo_client.drop_database(platform_id) + db_access.db_mongo_client.drop_database(guild_id) db_access.db_mongo_client["Saga"].drop_collection("sagas") - db_access.db_mongo_client[guild_id].drop_collection("guildmembers") db_access.db_mongo_client[at_db].drop_collection(at_collection) act_param = { @@ -66,7 +65,7 @@ def test_publish_on_success_check_notification_choreographies(): "id": guild_id, "icon": "111111111111111111111111", "name": "A guild", - "selectedChannels": ["4455178"], + "resources": ["4455178"], "window": window, "action": act_param, "period": datetime.now() - timedelta(days=10), @@ -80,19 +79,6 @@ def test_publish_on_success_check_notification_choreographies(): } ) - # Adding sample memberactivities - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") - ) - - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") - ) - db_access.db_mongo_client["Saga"]["sagas"].insert_one( { "choreography": { @@ -120,7 +106,7 @@ def test_publish_on_success_check_notification_choreographies(): }, "status": "IN_PROGRESS", "data": { - "platformId": ObjectId(platform_id), + "platformId": platform_id, "created": False, "discordId": expected_owner_id, "message": "data is ready", @@ -252,19 +238,15 @@ def test_publish_on_success_check_notification_choreographies(): db_access.db_mongo_client[at_db][at_collection].insert_one(automation.to_dict()) - date_yesterday = ( - (datetime.now() - timedelta(days=1)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - date_two_past_days = ( - (datetime.now() - timedelta(days=2)) - .replace(hour=0, minute=0, second=0) - .strftime("%Y-%m-%dT%H:%M:%S") + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 ) - db_access.db_mongo_client[guild_id]["memberactivities"].insert_many( + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( [ { "date": date_yesterday, @@ -315,21 +297,7 @@ def test_publish_on_success_check_notification_choreographies(): ] ) - # preparing the data for publish_on_success function - mongo_creds = get_mongo_credentials() - user = mongo_creds["user"] - password = mongo_creds["password"] - host = mongo_creds["host"] - port = mongo_creds["port"] - connection_uri = f"mongodb://{user}:{password}@{host}:{port}" - mongo_creds = { - "connection_str": connection_uri, - "db_name": "Saga", - "collection_name": "sagas", - } - - sample_args_data = saga_id - publish_on_success(None, None, sample_args_data) + publish_on_success(platform_id, recompute=False) notification_count = db_access.db_mongo_client["Saga"]["sagas"].count_documents( {"choreography.name": "DISCORD_NOTIFY_USERS"} diff --git a/tests/integration/test_publish_on_success_recompute_true.py b/tests/integration/test_publish_on_success_recompute_true.py new file mode 100644 index 0000000..79ef6e3 --- /dev/null +++ b/tests/integration/test_publish_on_success_recompute_true.py @@ -0,0 +1,350 @@ +import os +from datetime import datetime, timedelta + +from bson.objectid import ObjectId +from dotenv import load_dotenv +from tc_analyzer_lib.automation.utils.interfaces import ( + Automation, + AutomationAction, + AutomationReport, + AutomationTrigger, +) +from tc_analyzer_lib.publish_on_success import publish_on_success + +from .utils.analyzer_setup import launch_db_access + + +def test_publish_on_success_recompute_true_check_notification_choreographies(): + """ + test the publish on success functions + we want to check the database if the notify choreographies are created + """ + load_dotenv() + platform_id = "515151515151515151515151" + guild_id = "1234" + saga_id = "000000011111113333377777ie0w" + expected_owner_id = "334461287892" + db_access = launch_db_access(guild_id) + at_db = os.getenv("AUTOMATION_DB_NAME") + at_collection = os.getenv("AUTOMATION_DB_COLLECTION") + + db_access.db_mongo_client["Core"].drop_collection("platforms") + db_access.db_mongo_client["Core"].drop_collection("users") + db_access.db_mongo_client.drop_database(platform_id) + db_access.db_mongo_client.drop_database(guild_id) + db_access.db_mongo_client["Saga"].drop_collection("sagas") + db_access.db_mongo_client[at_db].drop_collection(at_collection) + + act_param = { + "INT_THR": 1, + "UW_DEG_THR": 1, + "PAUSED_T_THR": 1, + "CON_T_THR": 4, + "CON_O_THR": 3, + "EDGE_STR_THR": 5, + "UW_THR_DEG_THR": 5, + "VITAL_T_THR": 4, + "VITAL_O_THR": 3, + "STILL_T_THR": 2, + "STILL_O_THR": 2, + "DROP_H_THR": 2, + "DROP_I_THR": 1, + } + window = { + "period_size": 7, + "step_size": 1, + } + community_id = "aabbccddeeff001122334455" + owner_discord_id = "123487878912" + + db_access.db_mongo_client["Core"]["platforms"].insert_one( + { + "_id": ObjectId(platform_id), + "name": "discord", + "metadata": { + "id": guild_id, + "icon": "111111111111111111111111", + "name": "A guild", + "resources": ["4455178"], + "window": window, + "action": act_param, + "period": datetime.now() - timedelta(days=10), + }, + "community": ObjectId(community_id), + "disconnectedAt": None, + "connectedAt": (datetime.now() - timedelta(days=10)), + "isInProgress": True, + "createdAt": datetime(2023, 11, 1), + "updatedAt": datetime(2023, 11, 1), + } + ) + + db_access.db_mongo_client["Core"]["users"].insert_one( + { + "_id": ObjectId(platform_id), + "discordId": owner_discord_id, + "communities": [ObjectId(community_id)], + "createdAt": datetime(2023, 12, 1), + "updatedAt": datetime(2023, 12, 1), + "tcaAt": datetime(2023, 12, 2), + } + ) + + db_access.db_mongo_client["Saga"]["sagas"].insert_one( + { + "choreography": { + "name": "DISCORD_UPDATE_CHANNELS", + "transactions": [ + { + "queue": "DISCORD_BOT", + "event": "FETCH", + "order": 1, + "status": "SUCCESS", + "start": datetime.now(), + "end": datetime.now(), + "runtime": 1, + }, + { + "queue": "DISCORD_ANALYZER", + "event": "RUN", + "order": 2, + "status": "SUCCESS", + "start": datetime.now(), + "end": datetime.now(), + "runtime": 1, + }, + ], + }, + "status": "IN_PROGRESS", + "data": { + "platformId": platform_id, + "created": False, + "discordId": expected_owner_id, + "message": "data is ready", + "useFallback": True, + }, + "sagaId": saga_id, + "createdAt": datetime.now(), + "updatedAt": datetime.now(), + } + ) + + db_access.db_mongo_client[guild_id]["guildmembers"].insert_many( + [ + { + "discordId": "1111", + "username": "user1", + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": "User1GlobalName", + "nickname": "User1NickName", # this will be used for the message + }, + { + "discordId": "1112", + "username": "user2", + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": "User2GlobalName", # this will be used for the message + "nickname": None, + }, + { + "discordId": "1113", + "username": "user3", # this will be used for the message + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": None, + "nickname": None, + }, + { + "discordId": "1116", + "username": "user6", + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": "User6GlobalName", + "nickname": "User6NickName", + }, + { + "discordId": "1119", + "username": "user9", + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": "User9GlobalName", + "nickname": None, + }, + { + "discordId": "999", + "username": "community_manager", + "roles": [], + "joinedAt": datetime.now() - timedelta(days=10), + "avatar": None, + "isBot": False, + "discriminator": "0", + "permissions": "6677", + "deletedAt": None, + "globalName": "User9GlobalName", + "nickname": None, + }, + ] + ) + + triggers = [ + AutomationTrigger(options={"category": "all_new_disengaged"}, enabled=True), + AutomationTrigger(options={"category": "all_new_active"}, enabled=False), + ] + actions = [ + AutomationAction( + template="hey {{ngu}}! please get back to us!", + options={}, + enabled=True, + ), + AutomationAction( + template="hey {{ngu}}! please get back to us2!", + options={}, + enabled=False, + ), + ] + + report = AutomationReport( + recipientIds=["999"], + template="hey body! This users were messaged:\n{{#each usernames}}{{this}}{{/each}}", + options={}, + enabled=True, + ) + today_time = datetime.now() + + automation = Automation( + guild_id, + triggers, + actions, + report, + enabled=True, + createdAt=today_time, + updatedAt=today_time, + ) + + db_access.db_mongo_client[at_db][at_collection].insert_one(automation.to_dict()) + + date_yesterday = (datetime.now() - timedelta(days=1)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + date_two_past_days = (datetime.now() - timedelta(days=2)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + + db_access.db_mongo_client[platform_id]["memberactivities"].insert_many( + [ + { + "date": date_yesterday, + "all_joined": [], + "all_joined_day": [], + "all_consistent": [], + "all_vital": [], + "all_active": [], + "all_connected": [], + "all_paused": [], + "all_new_disengaged": ["1111", "1112", "1113"], + "all_disengaged": [], + "all_unpaused": [], + "all_returned": [], + "all_new_active": [], + "all_still_active": [], + "all_dropped": [], + "all_disengaged_were_newly_active": [], + "all_disengaged_were_consistently_active": [], + "all_disengaged_were_vital": [], + "all_lurker": [], + "all_about_to_disengage": [], + "all_disengaged_in_past": [], + }, + { + "date": date_two_past_days, + "all_joined": [], + "all_joined_day": [], + "all_consistent": [], + "all_vital": [], + "all_active": [], + "all_connected": [], + "all_paused": [], + "all_new_disengaged": ["1116", "1119"], + "all_disengaged": [], + "all_unpaused": [], + "all_returned": [], + "all_new_active": [], + "all_still_active": [], + "all_dropped": [], + "all_disengaged_were_newly_active": [], + "all_disengaged_were_consistently_active": [], + "all_disengaged_were_vital": [], + "all_lurker": [], + "all_about_to_disengage": [], + "all_disengaged_in_past": [], + }, + ] + ) + + publish_on_success(platform_id, recompute=True) + + notification_count = db_access.db_mongo_client["Saga"]["sagas"].count_documents( + {"choreography.name": "DISCORD_NOTIFY_USERS"} + ) + + assert notification_count == 5 + + user1_doc = db_access.db_mongo_client["Saga"]["sagas"].find_one( + {"data.discordId": "1111"} + ) + assert user1_doc["data"]["message"] == ("hey User1NickName! please get back to us!") + + user2_doc = db_access.db_mongo_client["Saga"]["sagas"].find_one( + {"data.discordId": "1112"} + ) + assert user2_doc["data"]["message"] == ( + "hey User2GlobalName! please get back to us!" + ) + + user3_doc = db_access.db_mongo_client["Saga"]["sagas"].find_one( + {"data.discordId": "1113"} + ) + assert user3_doc["data"]["message"] == ("hey user3! please get back to us!") + + user_cm_doc = db_access.db_mongo_client["Saga"]["sagas"].find_one( + {"data.discordId": "999"} + ) + expected_msg = "hey body! This users were messaged:\n" + expected_msg += "- User1NickName\n- User2GlobalName\n- user3\n" + assert user_cm_doc["data"]["message"] == expected_msg + + job_finished_saga = db_access.db_mongo_client["Saga"]["sagas"].find_one( + {"data.discordId": owner_discord_id} + ) + assert job_finished_saga["data"]["message"] == ( + "Your data import into TogetherCrew is complete! " + "See your insights on your dashboard https://app.togethercrew.com/." + " If you have questions send a DM to katerinabc (Discord) or k_bc0 (Telegram)." + ) diff --git a/tests/integration/test_rawinfo_webhook_fetching.py b/tests/integration/test_rawinfo_webhook_fetching.py index d74e4ae..96e8575 100644 --- a/tests/integration/test_rawinfo_webhook_fetching.py +++ b/tests/integration/test_rawinfo_webhook_fetching.py @@ -1,7 +1,7 @@ from datetime import datetime, timedelta -from discord_analyzer.models.RawInfoModel import RawInfoModel -from utils.mongo import MongoSingleton +from tc_analyzer_lib.models.RawInfoModel import RawInfoModel +from tc_analyzer_lib.utils.mongo import MongoSingleton def test_rawinfo_get_day_entry_empty_data(): @@ -13,7 +13,7 @@ def test_rawinfo_get_day_entry_empty_data(): mongo_singleton = MongoSingleton.get_instance() client = mongo_singleton.get_client() - client[guildId].drop_collection("rawinfos") + client[guildId].drop_collection("rawmemberactivities") rawinfo_model = RawInfoModel(client[guildId]) @@ -32,7 +32,7 @@ def test_rawinfo_get_day_entry_data_avaialble(): mongo_singleton = MongoSingleton.get_instance() client = mongo_singleton.get_client() - client[guildId].drop_collection("rawinfos") + client[guildId].drop_collection("rawmemberactivities") specific_midday = datetime(2023, 3, 3, 12) @@ -104,7 +104,7 @@ def test_rawinfo_get_day_entry_data_avaialble(): }, ] - client[guildId]["rawinfos"].insert_many(rawinfo_samples) + client[guildId]["rawmemberactivities"].insert_many(rawinfo_samples) rawinfo_model = RawInfoModel(client[guildId]) diff --git a/tests/integration/test_service_connection.py b/tests/integration/test_service_connection.py index 1d4c4e7..ff828d2 100644 --- a/tests/integration/test_service_connection.py +++ b/tests/integration/test_service_connection.py @@ -1,5 +1,5 @@ +from tc_analyzer_lib.utils.credentials import get_rabbit_mq_credentials from tc_messageBroker.message_broker import RabbitMQ -from utils.credentials import get_rabbit_mq_credentials def test_rabbit_mq_connect(): diff --git a/tests/integration/utils/analyzer_setup.py b/tests/integration/utils/analyzer_setup.py index c634549..3c4c87f 100644 --- a/tests/integration/utils/analyzer_setup.py +++ b/tests/integration/utils/analyzer_setup.py @@ -1,43 +1,9 @@ -import os - -from discord_analyzer.DB_operations.mongodb_access import DB_access -from discord_analyzer.rn_analyzer import RnDaoAnalyzer from dotenv import load_dotenv +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access -def setup_analyzer( - guild_id: str, -) -> RnDaoAnalyzer: +def launch_db_access(platform_id: str): load_dotenv() - - analyzer = RnDaoAnalyzer(guild_id) - - user = os.getenv("MONGODB_USER", "") - password = os.getenv("MONGODB_PASS", "") - host = os.getenv("MONGODB_HOST", "") - port = os.getenv("MONGODB_PORT", "") - - analyzer.set_mongo_database_info( - mongo_db_host=host, - mongo_db_password=password, - mongo_db_user=user, - mongo_db_port=port, - ) - - analyzer.database_connect() - - return analyzer - - -def launch_db_access(guildId: str): - load_dotenv() - user = os.getenv("MONGODB_USER") - password = os.getenv("MONGODB_PASS") - host = os.getenv("MONGODB_HOST") - port = os.getenv("MONGODB_PORT") - - connection_str = f"mongodb://{user}:{password}@{host}:{port}" - - db_access = DB_access(guildId, connection_str) + db_access = DB_access(platform_id) print("CONNECTED to MongoDB!") return db_access diff --git a/tests/integration/utils/mock_graph.py b/tests/integration/utils/mock_graph.py index 27c531b..14eee74 100644 --- a/tests/integration/utils/mock_graph.py +++ b/tests/integration/utils/mock_graph.py @@ -1,7 +1,7 @@ -import os +# import os -from discord_analyzer import RnDaoAnalyzer -from dotenv import load_dotenv +# from dotenv import load_dotenv +# from tc_analyzer_lib.tc_analyzer import TCAnalyzer from tc_core_analyzer_lib.assess_engagement import EngagementAssessment from tc_core_analyzer_lib.utils.activity import DiscordActivity @@ -64,36 +64,25 @@ def generate_mock_graph(int_matrix, acc_names): return graph -def store_mock_data_in_neo4j(graph_dict, guildId, community_id): - # CREDS - load_dotenv() - user = os.getenv("MONGODB_USER") - password = os.getenv("MONGODB_PASS") - host = os.getenv("MONGODB_HOST") - port = os.getenv("MONGODB_PORT") +# def store_mock_data_in_neo4j(graph_dict, guildId, community_id): +# # CREDS +# load_dotenv() - analyzer = RnDaoAnalyzer(guildId) +# analyzer = TCAnalyzer(guildId) +# analyzer.database_connect() - analyzer.set_mongo_database_info( - mongo_db_host=host, - mongo_db_password=password, - mongo_db_user=user, - mongo_db_port=port, - ) - analyzer.database_connect() - - guilds_data = {} +# guilds_data = {} - guilds_data["heatmaps"] = None - guilds_data["memberactivities"] = ( - None, - graph_dict, - ) +# guilds_data["heatmaps"] = None +# guilds_data["memberactivities"] = ( +# None, +# graph_dict, +# ) - analyzer.DB_connections.store_analytics_data( - analytics_data=guilds_data, - guild_id=guildId, - community_id=community_id, - remove_heatmaps=False, - remove_memberactivities=False, - ) +# analyzer.DB_connections.store_analytics_data( +# analytics_data=guilds_data, +# guild_id=guildId, +# community_id=community_id, +# remove_heatmaps=False, +# remove_memberactivities=False, +# ) diff --git a/tests/integration/utils/mock_heatmaps.py b/tests/integration/utils/mock_heatmaps.py index bc421f1..1d028d5 100644 --- a/tests/integration/utils/mock_heatmaps.py +++ b/tests/integration/utils/mock_heatmaps.py @@ -14,8 +14,8 @@ def create_empty_heatmaps_data( for i in range(count): date = start_date + timedelta(days=i) document = { - "date": date.strftime("%Y-%m-%d"), - "channelId": "1020707129214111827", + "date": date, + "channel_id": "1020707129214111827", "thr_messages": list(np.zeros(24)), "lone_messages": list(np.zeros(24)), "replier": list(np.zeros(24)), @@ -27,7 +27,7 @@ def create_empty_heatmaps_data( "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], - "account_name": "973993299281076285", + "user": "973993299281076285", } data.append(document) diff --git a/tests/integration/utils/mock_memberactivities.py b/tests/integration/utils/mock_memberactivities.py index a1fa351..20fc184 100644 --- a/tests/integration/utils/mock_memberactivities.py +++ b/tests/integration/utils/mock_memberactivities.py @@ -14,7 +14,7 @@ def create_empty_memberactivities_data( date = start_date + timedelta(days=i) date = date.replace(hour=0, minute=0, second=0, microsecond=0) document = { - "date": date.isoformat(), + "date": date, "all_joined": [], "all_joined_day": [], "all_consistent": [], diff --git a/tests/integration/utils/remove_and_setup_guild.py b/tests/integration/utils/setup_platform.py similarity index 69% rename from tests/integration/utils/remove_and_setup_guild.py rename to tests/integration/utils/setup_platform.py index 20105cd..2bf06b4 100644 --- a/tests/integration/utils/remove_and_setup_guild.py +++ b/tests/integration/utils/setup_platform.py @@ -3,32 +3,41 @@ import numpy as np from bson.objectid import ObjectId -from discord_analyzer.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.DB_operations.mongodb_access import DB_access +from tc_analyzer_lib.tc_analyzer import TCAnalyzer -def setup_db_guild( +def setup_platform( db_access: DB_access, platform_id: str, - guildId: str = "1234", discordId_list: list[str] = ["973993299281076285"], discordId_isbot: list[bool] = [False], dates: Optional[list[datetime]] = None, days_ago_period: int = 30, **kwargs, -): +) -> TCAnalyzer: """ Remove the guild from Core databse and then insert it there also drop the guildId database and re-create - it then create the guildmembers collection in it + it then create the rawmembers collection in it `discordId_isbot` is representative if each user is bot or not `community_id` can be passed in kwargs. default is `aabbccddeeff001122334455` """ community_id = kwargs.get("community_id", "aabbccddeeff001122334455") + resources = kwargs.get("resources", ["1020707129214111827"]) db_access.db_mongo_client["Core"]["platforms"].delete_one( {"_id": ObjectId(platform_id)} ) - db_access.db_mongo_client.drop_database(guildId) + db_access.db_mongo_client.drop_database(platform_id) + + period = (datetime.now() - timedelta(days=days_ago_period)).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + window = kwargs.get( + "window", + {"period_size": 7, "step_size": 1}, + ) action = kwargs.get( "action", @@ -49,6 +58,7 @@ def setup_db_guild( }, ) + guildId = "1234" db_access.db_mongo_client["Core"]["platforms"].insert_one( { "_id": ObjectId(platform_id), @@ -57,10 +67,10 @@ def setup_db_guild( "id": guildId, "icon": "111111111111111111111111", "name": "A guild", - "selectedChannels": ["1020707129214111827"], - "window": {"period_size": 7, "step_size": 1}, + "resources": resources, + "window": window, "action": action, - "period": datetime.now() - timedelta(days=days_ago_period), + "period": period, }, "community": ObjectId(community_id), "disconnectedAt": None, @@ -71,6 +81,14 @@ def setup_db_guild( } ) + analyzer = TCAnalyzer( + platform_id, + resources=resources, + period=period, + action=action, + window=window, + ) + if dates is None: dates_using = np.repeat( datetime.now() - timedelta(days=10), len(discordId_list) @@ -89,14 +107,14 @@ def setup_db_guild( user_data = zip(discordId_list, discordId_isbot) for idx, (discordId, isbot) in enumerate(user_data): - db_access.db_mongo_client[guildId]["guildmembers"].insert_one( + db_access.db_mongo_client[platform_id]["rawmembers"].insert_one( { - "discordId": discordId, - "username": f"sample_user_{idx}", - "roles": ["1012430565959553145"], - "joinedAt": dates_using[idx], - "avatar": "3ddd6e429f75d6a711d0a58ba3060694", - "isBot": isbot, - "discriminator": "0", + "id": discordId, + "joined_at": dates_using[idx], + "left_at": None, + "is_bot": isbot, + "options": {}, } ) + + return analyzer diff --git a/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py new file mode 100644 index 0000000..5de93a5 --- /dev/null +++ b/tests/unit/test_analyzer_heatmaps_compute_iteration_counts.py @@ -0,0 +1,36 @@ +from datetime import datetime +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps import Heatmaps +from tc_analyzer_lib.schemas.platform_configs import DiscordAnalyzerConfig + + +class TestAnalyzerHeatmapsIterationCount(TestCase): + def setUp(self) -> None: + platform_id = "1234567890" + period = datetime(2024, 1, 1) + resources = list["123", "124", "125"] + # using one of the configs we currently have + # it could be any other platform's config + discord_analyzer_config = DiscordAnalyzerConfig() + + self.heatmaps = Heatmaps( + platform_id=platform_id, + period=period, + resources=resources, + analyzer_config=discord_analyzer_config, + ) + + def test_compute_iteration_counts(self): + analytics_date = datetime(2024, 1, 1) + now = datetime.now() + + days = (now - analytics_date).days + + iteration_count = self.heatmaps._compute_iteration_counts( + analytics_date=analytics_date, + resources_count=5, + authors_count=5, + ) + + self.assertEqual(iteration_count, days * 5 * 5) # five days diff --git a/tests/unit/test_automation_class.py b/tests/unit/test_automation_class.py index b7e02d4..31aaf45 100644 --- a/tests/unit/test_automation_class.py +++ b/tests/unit/test_automation_class.py @@ -1,7 +1,7 @@ import unittest from datetime import datetime -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.utils.interfaces import ( Automation, AutomationAction, AutomationReport, diff --git a/tests/unit/test_automation_get_handler_type.py b/tests/unit/test_automation_get_handler_type.py index ccbd8bd..434583c 100644 --- a/tests/unit/test_automation_get_handler_type.py +++ b/tests/unit/test_automation_get_handler_type.py @@ -1,6 +1,6 @@ import unittest -from automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow class TestGetHandlerType(unittest.TestCase): diff --git a/tests/unit/test_automation_trigger_action_report.py b/tests/unit/test_automation_trigger_action_report.py index 4f2ec9f..c8b8802 100644 --- a/tests/unit/test_automation_trigger_action_report.py +++ b/tests/unit/test_automation_trigger_action_report.py @@ -1,6 +1,6 @@ import unittest -from automation.utils.interfaces import ( +from tc_analyzer_lib.automation.utils.interfaces import ( AutomationAction, AutomationReport, AutomationTrigger, diff --git a/tests/unit/test_compile_message.py b/tests/unit/test_compile_message.py index 57dae41..d7a3239 100644 --- a/tests/unit/test_compile_message.py +++ b/tests/unit/test_compile_message.py @@ -1,6 +1,6 @@ import unittest -from automation.automation_workflow import AutomationWorkflow +from tc_analyzer_lib.automation.automation_workflow import AutomationWorkflow class CompileTemplateMessage(unittest.TestCase): diff --git a/tests/unit/test_converting_to_dict.py b/tests/unit/test_converting_to_dict.py index 4fd9a68..505dc17 100644 --- a/tests/unit/test_converting_to_dict.py +++ b/tests/unit/test_converting_to_dict.py @@ -1,4 +1,4 @@ -from discord_analyzer.analysis.utils.member_activity_utils import convert_to_dict +from tc_analyzer_lib.algorithms.utils.member_activity_utils import convert_to_dict def test_empty(): diff --git a/tests/unit/test_creds_loading.py b/tests/unit/test_creds_loading.py index 2d35349..3d8f27c 100644 --- a/tests/unit/test_creds_loading.py +++ b/tests/unit/test_creds_loading.py @@ -1,4 +1,4 @@ -from utils.credentials import ( +from tc_analyzer_lib.utils.credentials import ( get_mongo_credentials, get_rabbit_mq_credentials, get_redis_credentials, diff --git a/tests/unit/test_engagement_notifier_subtract_users.py b/tests/unit/test_engagement_notifier_subtract_users.py index 8140903..613c8e8 100644 --- a/tests/unit/test_engagement_notifier_subtract_users.py +++ b/tests/unit/test_engagement_notifier_subtract_users.py @@ -1,4 +1,4 @@ -from automation.utils.automation_base import AutomationBase +from tc_analyzer_lib.automation.utils.automation_base import AutomationBase def test_subtract_users_empty_data(): diff --git a/tests/unit/test_filter_channel_name_id.py b/tests/unit/test_filter_channel_name_id.py deleted file mode 100644 index 27dbb48..0000000 --- a/tests/unit/test_filter_channel_name_id.py +++ /dev/null @@ -1,107 +0,0 @@ -from discord_analyzer.analysis.analytics_interactions_script import ( - filter_channel_name_id, -) - - -def test_filter_channel_name_empty_input(): - sample_input = [] - - output = filter_channel_name_id(sample_input) - - assert output == {} - - -def test_filter_channel_name_one_synthesized_input(): - sample_input = [ - { - "channelId": "123", - "channelName": "welcome-and-rules", - } - ] - - output = filter_channel_name_id(sample_input) - - assert output == {"123": "welcome-and-rules"} - - -def test_filter_channel_name_multiple_synthesized_input(): - sample_input = [ - { - "channelId": "123", - "channelName": "welcome-and-rules", - }, - { - "channelId": "1234", - "channelName": "welcome-and-rules2", - }, - { - "channelId": "12345", - "channelName": "welcome-and-rules3", - }, - ] - - output = filter_channel_name_id(sample_input) - - assert output == { - "123": "welcome-and-rules", - "1234": "welcome-and-rules2", - "12345": "welcome-and-rules3", - } - - -def test_filter_channel_name_one_real_input(): - sample_input = [ - { - "_id": {"$oid": "6436d6ab47ce0ae8b83f25fc"}, - "channelId": "993163081939165236", - "__v": 0, - "channelName": "welcome-and-rules", - "last_update": {"$date": "2023-05-10T01:00:05.379Z"}, - } - ] - - output = filter_channel_name_id(sample_input) - - assert output == {"993163081939165236": "welcome-and-rules"} - - -def test_filter_channel_name_multiple_real_input(): - sample_input = [ - { - "_id": {"$oid": "6436d6ab47ce0ae8b83f25fc"}, - "channelId": "993163081939165236", - "__v": 0, - "channelName": "welcome-and-rules", - "last_update": {"$date": "2023-05-10T01:00:05.379Z"}, - }, - { - "_id": {"$oid": "6436d6ab47ce0ae8b83f2600"}, - "channelId": "993163081939165237", - "__v": 0, - "channelName": "announcements", - "last_update": {"$date": "2023-05-10T01:00:05.382Z"}, - }, - { - "_id": {"$oid": "6436d6ab47ce0ae8b83f260a"}, - "channelId": "993163081939165238", - "__v": 0, - "channelName": "resources", - "last_update": {"$date": "2023-05-10T01:00:05.385Z"}, - }, - { - "_id": {"$oid": "6436d6ab47ce0ae8b83f2613"}, - "channelId": "993163081939165240", - "__v": 0, - "channelName": "general", - "last_update": {"$date": "2023-05-10T01:00:05.407Z"}, - }, - ] - - output = filter_channel_name_id(sample_input) - - assert output == { - "993163081939165236": "welcome-and-rules", - "993163081939165237": "announcements", - "993163081939165238": "resources", - "993163081939165240": "general", - } diff --git a/tests/unit/test_filter_channel_thread.py b/tests/unit/test_filter_channel_thread.py deleted file mode 100644 index 20993c9..0000000 --- a/tests/unit/test_filter_channel_thread.py +++ /dev/null @@ -1,160 +0,0 @@ -from discord_analyzer.analysis.analytics_interactions_script import ( - filter_channel_thread, -) - - -def test_filter_channel_thread_single_empty_input(): - sample_input = [] - - output = filter_channel_thread(sample_input) - - assert output == {} - - -def test_filter_channel_thread_multiple_empty_inputs(): - sample_input = [] - - output = filter_channel_thread( - sample_input, - ) - - assert output == {} - - -def test_filter_channel_thread_single_channel_single_message(): - sample_input = [ - { - "author": "ahmadyazdanii#7517", - "content": "test", - "createdDate": "2023-04-19 07:05:17", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": None, - "threadName": None, - } - ] - - output = filter_channel_thread( - sample_input, - ) - - sample_output = {"off-topic": {None: {"1:ahmadyazdanii#7517": "test"}}} - - assert output == sample_output - - -# flake8: noqa -def test_filter_channel_thread_multiple_channel_multiple_message_single_user_all_channels(): - sample_input = [ - { - "author": "ahmadyazdanii#7517", - "content": "test", - "createdDate": "2023-04-19 07:05:17", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": None, - "threadName": None, - }, - { - "author": "ahmadyazdanii#7517", - "content": "hi", - "createdDate": "2023-04-19 07:05:18", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": "1098202658390691930", - "threadName": "test", - }, - { - "author": "ahmadyazdanii#7517", - "content": "test2", - "createdDate": "2023-04-19 07:14:57", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": "1098202658390691930", - "threadName": "test", - }, - ] - - output = filter_channel_thread( - sample_input, - ) - - sample_output = { - "off-topic": { - None: {"1:ahmadyazdanii#7517": "test"}, - "test": { - "1:ahmadyazdanii#7517": "hi", - "2:ahmadyazdanii#7517": "test2", - }, - } - } - - assert output == sample_output - - -def test_filter_channel_thread_single_channel_multiple_message_multiple_user_all_channels(): # flake8: noqa - sample_input = [ - { - "author": "ahmadyazdanii#7517", - "content": "test", - "createdDate": "2023-03-10 07:05:17", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": None, - "threadName": None, - }, - { - "author": "Ene", - "content": "Hello", - "createdDate": "2023-03-11 07:05:17", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": "1098202658390691930", - "threadName": "test-thread", - }, - { - "author": "Amin", - "content": "Hi", - "createdDate": "2023-03-12 07:05:18", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": "1098202658390691930", - "threadName": "test-thread", - }, - { - "author": "Behzad", - "content": "Ola!", - "createdDate": "2023-04-07 07:14:57", - "channelId": "993163081939165240", - "channelName": "off-topic", - "threadId": "1098202658390691930", - "threadName": "test-thread", - }, - { - "author": "Nima", - "content": "Salam!", - "createdDate": "2023-04-12 07:14:57", - "channelId": "993163081939165222", - "channelName": "off-topic-2", - "threadId": "1098202658390691931", - "threadName": "test-thread2", - }, - ] - - output = filter_channel_thread( - sample_input, - ) - - sample_output = { - "off-topic": { - None: {"1:ahmadyazdanii#7517": "test"}, - "test-thread": { - "1:Ene": "Hello", - "2:Amin": "Hi", - "3:Behzad": "Ola!", - }, - }, - "off-topic-2": {"test-thread2": {"1:Nima": "Salam!"}}, - } - - assert output == sample_output diff --git a/tests/unit/test_generate_interaction_mtx.py b/tests/unit/test_generate_interaction_mtx.py index bb663d9..fe545a2 100644 --- a/tests/unit/test_generate_interaction_mtx.py +++ b/tests/unit/test_generate_interaction_mtx.py @@ -1,5 +1,5 @@ -from discord_analyzer.analysis.utils.activity import Activity -from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( +from tc_analyzer_lib.algorithms.utils.activity import Activity +from tc_analyzer_lib.algorithms.utils.compute_interaction_mtx_utils import ( generate_interaction_matrix, ) @@ -9,32 +9,32 @@ def test_empty_inputs(): int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=[], - activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + activities=["reacted_per_acc", "mentioner_per_acc", "replied_per_acc"], ) assert int_mtx.shape == (0, 0) def test_single_account(): per_acc_interactions = { - "968122690118512720": [ + "user0": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "user0", + "reacted_per_acc": [{"account": "user0", "count": 1}], + "mentioner_per_acc": [{"account": "user0", "count": 1}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "user0", + "reacted_per_acc": [{"account": "user0", "count": 1}], + "mentioner_per_acc": [{"account": "user0", "count": 1}], "replied_per_acc": [], }, ] } int_mtx = generate_interaction_matrix( per_acc_interactions, - acc_names=["968122690118512720"], - activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + acc_names=["user0"], + activities=["reacted_per_acc", "mentioner_per_acc", "replied_per_acc"], ) # converting `numpy.bool_` to python `bool` @@ -43,19 +43,19 @@ def test_single_account(): def test_two_accounts(): - acc_names = ["968122690118512720", "968122690118512799"] + acc_names = ["user0", "user1"] per_acc_interactions = { - "968122690118512720": [ + "user0": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512799", "count": 1}]], + "user": "user0", + "reacted_per_acc": [{"account": "user1", "count": 1}], + "mentioner_per_acc": [{"account": "user1", "count": 1}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "user0", + "reacted_per_acc": [{"account": "user0", "count": 2}], + "mentioner_per_acc": [{"account": "user0", "count": 1}], "replied_per_acc": [], }, ] @@ -64,7 +64,7 @@ def test_two_accounts(): int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=acc_names, - activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + activities=["reacted_per_acc", "mentioner_per_acc", "replied_per_acc"], ) # converting `numpy.bool_` to python `bool` @@ -83,39 +83,39 @@ def test_multiple_interactions(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "795295822534148096", "count": 9}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [ - [{"account": "7952958225341480444", "count": 5}], - [{"account": "7952958225341480433", "count": 2}], + {"account": "7952958225341480444", "count": 5}, + {"account": "7952958225341480433", "count": 2}, ], "replied_per_acc": [], }, ], "968122690118512721": [ { - "account_name": "968122690118512721", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "user": "968122690118512721", + "reacted_per_acc": [{"account": "795295822534148096", "count": 3}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 4}], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], - "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + "replied_per_acc": [{"account": "7952958225341480444", "count": 8}], }, ], } @@ -123,7 +123,7 @@ def test_multiple_interactions(): int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=acc_names, - activities=[Activity.Mention, Activity.Reply, Activity.Reaction], + activities=["reacted_per_acc", "mentioner_per_acc", "replied_per_acc"], ) assert int_mtx.shape == (5, 5) diff --git a/tests/unit/test_generate_interaction_mtx_mention.py b/tests/unit/test_generate_interaction_mtx_mention.py index fd18db1..dbcec85 100644 --- a/tests/unit/test_generate_interaction_mtx_mention.py +++ b/tests/unit/test_generate_interaction_mtx_mention.py @@ -1,5 +1,5 @@ -from discord_analyzer.analysis.utils.activity import Activity -from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( +from tc_analyzer_lib.algorithms.utils.activity import Activity +from tc_analyzer_lib.algorithms.utils.compute_interaction_mtx_utils import ( generate_interaction_matrix, ) @@ -8,15 +8,15 @@ def test_single_account_mention(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 1}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 1}], "replied_per_acc": [], }, ] @@ -24,7 +24,7 @@ def test_single_account_mention(): int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=["968122690118512720"], - activities=[Activity.Mention], + activities=["mentioner_per_acc"], ) # converting `numpy.bool_` to python `bool` @@ -37,22 +37,22 @@ def test_two_accounts_mention(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512799", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512799", "count": 3}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 2}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 1}], "replied_per_acc": [], }, ] } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Mention] + per_acc_interactions, acc_names=acc_names, activities=["mentioner_per_acc"] ) # converting `numpy.bool_` to python `bool` is_match = bool((int_mtx == [[1, 3], [0, 0]]).all()) @@ -70,48 +70,48 @@ def test_multiple_interactions_mention(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "795295822534148096", "count": 9}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [ - [{"account": "7952958225341480444", "count": 7}], - [{"account": "7952958225341480433", "count": 1}], + {"account": "7952958225341480444", "count": 7}, + {"account": "7952958225341480433", "count": 1}, ], "mentioner_per_acc": [ - [{"account": "7952958225341480444", "count": 5}], - [{"account": "7952958225341480433", "count": 2}], + {"account": "7952958225341480444", "count": 5}, + {"account": "7952958225341480433", "count": 2}, ], "replied_per_acc": [], }, ], "968122690118512721": [ { - "account_name": "968122690118512721", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "user": "968122690118512721", + "reacted_per_acc": [{"account": "795295822534148096", "count": 3}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 4}], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], - "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + "replied_per_acc": [{"account": "7952958225341480444", "count": 8}], }, ], } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Mention] + per_acc_interactions, acc_names=acc_names, activities=["mentioner_per_acc"] ) assert int_mtx.shape == (5, 5) is_match = ( diff --git a/tests/unit/test_generate_interaction_mtx_reaction.py b/tests/unit/test_generate_interaction_mtx_reaction.py index 49b6c36..1183545 100644 --- a/tests/unit/test_generate_interaction_mtx_reaction.py +++ b/tests/unit/test_generate_interaction_mtx_reaction.py @@ -1,5 +1,5 @@ -from discord_analyzer.analysis.utils.activity import Activity -from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( +from tc_analyzer_lib.algorithms.utils.activity import Activity +from tc_analyzer_lib.algorithms.utils.compute_interaction_mtx_utils import ( generate_interaction_matrix, ) @@ -8,23 +8,23 @@ def test_single_account_reaction(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 7}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 4}]], - "replied_per_acc": [[{"account": "968122690118512720", "count": 3}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 7}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 4}], + "replied_per_acc": [{"account": "968122690118512720", "count": 3}], }, ] } int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=["968122690118512720"], - activities=[Activity.Reaction], + activities=["reacted_per_acc"], ) # converting `numpy.bool_` to python `bool` @@ -37,22 +37,22 @@ def test_two_accounts_reaction(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512799", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512799", "count": 3}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 2}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 1}], "replied_per_acc": [], }, ] } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Reaction] + per_acc_interactions, acc_names=acc_names, activities=["reacted_per_acc"] ) # converting `numpy.bool_` to python `bool` is_match = bool((int_mtx == [[2, 1], [0, 0]]).all()) @@ -70,48 +70,48 @@ def test_multiple_interactions_reaction(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "795295822534148096", "count": 9}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [ - [{"account": "7952958225341480444", "count": 7}], - [{"account": "7952958225341480433", "count": 1}], + {"account": "7952958225341480444", "count": 7}, + {"account": "7952958225341480433", "count": 1}, ], "mentioner_per_acc": [ - [{"account": "7952958225341480444", "count": 5}], - [{"account": "7952958225341480433", "count": 2}], + {"account": "7952958225341480444", "count": 5}, + {"account": "7952958225341480433", "count": 2}, ], "replied_per_acc": [], }, ], "968122690118512721": [ { - "account_name": "968122690118512721", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], + "user": "968122690118512721", + "reacted_per_acc": [{"account": "795295822534148096", "count": 3}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 4}], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], - "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + "replied_per_acc": [{"account": "7952958225341480444", "count": 8}], }, ], } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Reaction] + per_acc_interactions, acc_names=acc_names, activities=["reacted_per_acc"] ) print(int_mtx) assert int_mtx.shape == (5, 5) diff --git a/tests/unit/test_generate_interaction_mtx_reply.py b/tests/unit/test_generate_interaction_mtx_reply.py index e82ff8e..71985ea 100644 --- a/tests/unit/test_generate_interaction_mtx_reply.py +++ b/tests/unit/test_generate_interaction_mtx_reply.py @@ -1,5 +1,5 @@ -from discord_analyzer.analysis.utils.activity import Activity -from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( +from tc_analyzer_lib.algorithms.utils.activity import Activity +from tc_analyzer_lib.algorithms.utils.compute_interaction_mtx_utils import ( generate_interaction_matrix, ) @@ -8,23 +8,23 @@ def test_single_account_reply(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 7}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 4}]], - "replied_per_acc": [[{"account": "968122690118512720", "count": 3}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 7}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 4}], + "replied_per_acc": [{"account": "968122690118512720", "count": 3}], }, ] } int_mtx = generate_interaction_matrix( per_acc_interactions, acc_names=["968122690118512720"], - activities=[Activity.Reply], + activities=["replied_per_acc"], ) # converting `numpy.bool_` to python `bool` @@ -37,22 +37,22 @@ def test_two_accounts_reply(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512799", "count": 1}]], - "mentioner_per_acc": [[{"account": "968122690118512799", "count": 3}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512799", "count": 1}], + "mentioner_per_acc": [{"account": "968122690118512799", "count": 3}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "968122690118512720", "count": 2}]], - "mentioner_per_acc": [[{"account": "968122690118512720", "count": 1}]], - "replied_per_acc": [[{"account": "968122690118512799", "count": 7}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "968122690118512720", "count": 2}], + "mentioner_per_acc": [{"account": "968122690118512720", "count": 1}], + "replied_per_acc": [{"account": "968122690118512799", "count": 7}], }, ] } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Reply] + per_acc_interactions, acc_names=acc_names, activities=["replied_per_acc"] ) # converting `numpy.bool_` to python `bool` is_match = bool((int_mtx == [[0, 7], [0, 0]]).all()) @@ -70,51 +70,51 @@ def test_multiple_interactions_reply(): per_acc_interactions = { "968122690118512720": [ { - "account_name": "968122690118512720", - "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], - "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], + "user": "968122690118512720", + "reacted_per_acc": [{"account": "795295822534148096", "count": 9}], + "mentioner_per_acc": [{"account": "795295822534148096", "count": 2}], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], - "replied_per_acc": [[{"account": "7952958225341480444", "count": 7}]], + "replied_per_acc": [{"account": "7952958225341480444", "count": 7}], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [ - [{"account": "7952958225341480444", "count": 7}], - [{"account": "7952958225341480433", "count": 1}], + {"account": "7952958225341480444", "count": 7}, + {"account": "7952958225341480433", "count": 1}, ], "mentioner_per_acc": [ - [{"account": "7952958225341480444", "count": 5}], - [{"account": "7952958225341480433", "count": 2}], + {"account": "7952958225341480444", "count": 5}, + {"account": "7952958225341480433", "count": 2}, ], "replied_per_acc": [ - [{"account": "7952958225341480444", "count": 1}], - [{"account": "7952958225341480433", "count": 1}], + {"account": "7952958225341480444", "count": 1}, + {"account": "7952958225341480433", "count": 1}, ], }, ], "968122690118512721": [ { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], - "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], + "replied_per_acc": [{"account": "7952958225341480444", "count": 8}], }, ], } int_mtx = generate_interaction_matrix( - per_acc_interactions, acc_names=acc_names, activities=[Activity.Reply] + per_acc_interactions, acc_names=acc_names, activities=["replied_per_acc"] ) print(int_mtx) assert int_mtx.shape == (5, 5) diff --git a/tests/unit/test_get_timestamp.py b/tests/unit/test_get_timestamp.py index 37c36c7..adc61f2 100644 --- a/tests/unit/test_get_timestamp.py +++ b/tests/unit/test_get_timestamp.py @@ -1,15 +1,23 @@ import unittest from datetime import datetime, timezone -from discord_analyzer.DB_operations.network_graph import get_timestamp +from tc_analyzer_lib.DB_operations.network_graph import NetworkGraph +from tc_analyzer_lib.schemas import GraphSchema class TestGetTimestamp(unittest.TestCase): + def setUp(self) -> None: + platform_id = "51515151515151515151" + graph_schema = GraphSchema( + platform="discord", + ) + self.network_graph = NetworkGraph(graph_schema, platform_id) + def test_current_time(self): """ Test when no time is provided, it should return the current timestamp. """ - result = get_timestamp() + result = self.network_graph.get_timestamp() current_time = ( datetime.now(timezone.utc) .replace(hour=0, minute=0, second=0, microsecond=0) @@ -23,7 +31,7 @@ def test_specific_time(self): Test when a specific time is provided, it should return the correct timestamp. """ specific_time = datetime(2023, 1, 1, 12, 30, 0, tzinfo=timezone.utc) - result = get_timestamp(specific_time) + result = self.network_graph.get_timestamp(specific_time) expected_timestamp = ( specific_time.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000 @@ -34,8 +42,8 @@ def test_none_input(self): """ Test when `None` is provided as input, it should behave the same as not providing any time. """ - result_with_none = get_timestamp(None) - result_without_none = get_timestamp() + result_with_none = self.network_graph.get_timestamp(None) + result_without_none = self.network_graph.get_timestamp() self.assertAlmostEqual(result_with_none, result_without_none, delta=1000) @@ -44,7 +52,7 @@ def test_past_time(self): Test when a past time is provided, it should return the correct timestamp. """ past_time = datetime(2022, 1, 1, 12, 0, 0, tzinfo=timezone.utc) - result = get_timestamp(past_time) + result = self.network_graph.get_timestamp(past_time) expected_timestamp = ( past_time.replace(hour=0, minute=0, second=0, microsecond=0).timestamp() * 1000 @@ -58,7 +66,7 @@ def test_microseconds(self): time_with_microseconds = datetime( 2023, 1, 1, 12, 30, 0, 500000, tzinfo=timezone.utc ) - result = get_timestamp(time_with_microseconds) + result = self.network_graph.get_timestamp(time_with_microseconds) expected_timestamp = ( time_with_microseconds.replace( hour=0, minute=0, second=0, microsecond=0 diff --git a/tests/unit/test_graph_schema.py b/tests/unit/test_graph_schema.py new file mode 100644 index 0000000..851a307 --- /dev/null +++ b/tests/unit/test_graph_schema.py @@ -0,0 +1,45 @@ +from unittest import TestCase + +from tc_analyzer_lib.schemas import GraphSchema + + +class TestGraphSchema(TestCase): + def setUp(self): + self.platform_id = "51515515151515" + + def test_just_platform_name_given(self): + graph = GraphSchema( + platform="discord", + ) + + self.assertEqual(graph.platform_label, "DiscordPlatform") + self.assertEqual(graph.user_label, "DiscordMember") + self.assertEqual(graph.interacted_in_rel, "INTERACTED_IN") + self.assertEqual(graph.interacted_with_rel, "INTERACTED_WITH") + self.assertEqual(graph.member_relation, "IS_MEMBER") + + def test_platform_name_contain_space(self): + with self.assertRaises(ValueError): + _ = GraphSchema( + platform="my discord", + ) + + def test_platform_name_contain_underline(self): + with self.assertRaises(ValueError): + _ = GraphSchema( + platform="my_discord", + ) + + def test_given_all_inputs(self): + graph = GraphSchema( + platform="telegram", + interacted_in_rel="INTERACTED_IN_1", + interacted_with_rel="INTERACTED_WITH_2", + member_relation="IS_MEMBER_3", + ) + + self.assertEqual(graph.platform_label, "TelegramPlatform") + self.assertEqual(graph.user_label, "TelegramMember") + self.assertEqual(graph.interacted_in_rel, "INTERACTED_IN_1") + self.assertEqual(graph.interacted_with_rel, "INTERACTED_WITH_2") + self.assertEqual(graph.member_relation, "IS_MEMBER_3") diff --git a/tests/unit/test_heatmaps_analytics_base_process_vectors.py b/tests/unit/test_heatmaps_analytics_base_process_vectors.py new file mode 100644 index 0000000..aee8195 --- /dev/null +++ b/tests/unit/test_heatmaps_analytics_base_process_vectors.py @@ -0,0 +1,118 @@ +from unittest import TestCase + +from tc_analyzer_lib.metrics.heatmaps.analytics_hourly import AnalyticsHourly + + +class TestRawMemberActivitiesProcessVectors(TestCase): + def setUp(self) -> None: + self.platform_id = "3456789" + self.analytics_hourly = AnalyticsHourly(self.platform_id) + + def test_no_input(self): + input_data = [] + hourly_analytics = self.analytics_hourly._process_vectors(input_data) + self.assertIsInstance(hourly_analytics, list) + + # zeros vector with length 24 + expected_analytics = [ + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_single_input(self): + # hour 0 of the day had an activity of 2 + input_data = [{"_id": 0, "count": 2}] + hourly_analytics = self.analytics_hourly._process_vectors(input_data) + self.assertIsInstance(hourly_analytics, list) + + expected_analytics = [ + 2, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + ] + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) + + def test_multiple_input(self): + # hour 0 of the day had an activity of 2 + input_data = [ + {"_id": 0, "count": 2}, + {"_id": 3, "count": 4}, + {"_id": 19, "count": 7}, + ] + hourly_analytics = self.analytics_hourly._process_vectors(input_data) + self.assertIsInstance(hourly_analytics, list) + + expected_analytics = [ + 2, + 0, + 0, + 4, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 0, + 0, + 0, + ] + self.assertEqual(len(hourly_analytics), 24) + self.assertEqual(hourly_analytics, expected_analytics) diff --git a/tests/unit/test_member_activity_utils.py b/tests/unit/test_member_activity_utils.py index 3ce2fe4..2bd9a59 100644 --- a/tests/unit/test_member_activity_utils.py +++ b/tests/unit/test_member_activity_utils.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta -from discord_analyzer.analysis.utils.member_activity_history_utils import ( +from tc_analyzer_lib.algorithms.utils.member_activity_history_utils import ( MemberActivityPastUtils, ) @@ -13,9 +13,9 @@ def test_zero_joined(): all_joined_day = {} joined_acc = [ - {"joinedAt": (start_dt + timedelta(days=5)), "discordId": "000000000"}, - {"joinedAt": (start_dt + timedelta(days=6)), "discordId": "000000001"}, - {"joinedAt": (start_dt + timedelta(days=8)), "discordId": "000000002"}, + {"joined_at": (start_dt + timedelta(days=5)), "id": "000000000"}, + {"joined_at": (start_dt + timedelta(days=6)), "id": "000000001"}, + {"joined_at": (start_dt + timedelta(days=8)), "id": "000000002"}, ] member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) @@ -56,9 +56,9 @@ def test_single_joined(): } joined_acc = [ - {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000002"}, - {"joinedAt": (start_dt + timedelta(days=1)), "discordId": "000000003"}, - {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000004"}, + {"joined_at": (start_dt + timedelta(days=0)), "id": "000000002"}, + {"joined_at": (start_dt + timedelta(days=1)), "id": "000000003"}, + {"joined_at": (start_dt + timedelta(days=2)), "id": "000000004"}, ] member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) @@ -96,11 +96,11 @@ def test_multiple_joined(): } joined_acc = [ - {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000004"}, - {"joinedAt": (start_dt + timedelta(days=0)), "discordId": "000000005"}, - {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000006"}, - {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000007"}, - {"joinedAt": (start_dt + timedelta(days=2)), "discordId": "000000008"}, + {"joined_at": (start_dt + timedelta(days=0)), "id": "000000004"}, + {"joined_at": (start_dt + timedelta(days=0)), "id": "000000005"}, + {"joined_at": (start_dt + timedelta(days=2)), "id": "000000006"}, + {"joined_at": (start_dt + timedelta(days=2)), "id": "000000007"}, + {"joined_at": (start_dt + timedelta(days=2)), "id": "000000008"}, ] member_activitiy_utils = MemberActivityPastUtils(db_access=db_access) diff --git a/tests/unit/test_mongo_singleton.py b/tests/unit/test_mongo_singleton.py index 9734592..33eed71 100644 --- a/tests/unit/test_mongo_singleton.py +++ b/tests/unit/test_mongo_singleton.py @@ -1,7 +1,7 @@ import unittest from pymongo import MongoClient -from utils.mongo import MongoSingleton +from tc_analyzer_lib.utils.mongo import MongoSingleton class TestMongoSingleton(unittest.TestCase): diff --git a/tests/unit/test_parse_raction.py b/tests/unit/test_parse_raction.py deleted file mode 100644 index 8f65ec4..0000000 --- a/tests/unit/test_parse_raction.py +++ /dev/null @@ -1,66 +0,0 @@ -from discord_analyzer.analysis.activity_hourly import parse_reaction - - -def test_parse_raction_no_input(): - sample_input = [] - output = parse_reaction(sample_input) - - assert output == [] - - -def test_parse_reaction_partial_single_input(): - sample_input = ["user1,"] - - output = parse_reaction(sample_input) - - assert output == [["user1", ""]] - - -def test_parese_reaction_multiple_input_with_empty_reactions(): - sample_input = ["item1,item2|item3,,item4|item5,item6,item7|,"] - - output = parse_reaction(sample_input) - - assert output == [ - ["item1", "item2|item3", "", "item4|item5", "item6", "item7|", ""] - ] - - -def test_parese_reaction_multiple_input_with_space_reactions(): - sample_input = ["item1,item2|item3, ,item4|item5,item6,item7|, "] - - output = parse_reaction(sample_input) - - assert output == [ - ["item1", "item2|item3", " ", "item4|item5", "item6", "item7|", " "] - ] - - -def test_parse_raction_single_input(): - sample_input = ["emoji1"] - - output = parse_reaction(sample_input) - - assert len(output) == 1 - assert len(output[0]) == 1 - assert output == [["emoji1"]] - - -def test_parse_raction_multiple_input_with_singleComma(): - sample_input = ["mehrdad_mms#8600,😁", "mehrdad_mms#8600,🙌", "mehrdad_mms#8600,🤌"] - output = parse_reaction(sample_input) - - assert len(output) == 3 - assert output[0] == ["mehrdad_mms#8600", "😁"] - assert output[1] == ["mehrdad_mms#8600", "🙌"] - assert output[2] == ["mehrdad_mms#8600", "🤌"] - - -def test_parse_raction_multiple_input_with_multipleComma(): - sample_input = ["sepehr#3795,thegadget.eth#3374,👍", "sepehr#3795,❤️"] - - output = parse_reaction(sample_input) - - assert len(output) == 2 - assert output[0] == ["sepehr#3795", "thegadget.eth#3374", "👍"] - assert output[1] == ["sepehr#3795", "❤️"] diff --git a/tests/unit/test_per_account_interaction.py b/tests/unit/test_per_account_interaction.py index b44b581..cb8a722 100644 --- a/tests/unit/test_per_account_interaction.py +++ b/tests/unit/test_per_account_interaction.py @@ -1,4 +1,4 @@ -from discord_analyzer.analysis.analytics_interactions_script import ( +from tc_analyzer_lib.algorithms.analytics_interactions_script import ( per_account_interactions, ) @@ -6,7 +6,14 @@ def test_per_account_interaction_no_inputs(): sample_input = [] - results = per_account_interactions(sample_input) + results = per_account_interactions( + sample_input, + dict_keys=[ + "mentioner_accounts", + "reacter_accounts", + "replier_accounts", + ], + ) assert results["mentioner_accounts"] == {} assert results["reacter_accounts"] == {} @@ -17,29 +24,36 @@ def test_per_account_interaction_no_inputs(): def test_per_account_interaction_empty_inputs(): sample_input = [ { - "account_name": "acc1", - "channelId": "1234", + "user": "acc1", + "channel_id": "1234", "mentioner_accounts": [], "reacter_accounts": [], "replier_accounts": [], }, { - "account_name": "acc2", - "channelId": "321", + "user": "acc2", + "channel_id": "321", "mentioner_accounts": [], "reacter_accounts": [], "replier_accounts": [], }, { - "account_name": "acc2", - "channelId": "555", + "user": "acc2", + "channel_id": "555", "mentioner_accounts": [], "reacter_accounts": [], "replier_accounts": [], }, ] - results = per_account_interactions(sample_input) + results = per_account_interactions( + sample_input, + dict_keys=[ + "mentioner_accounts", + "reacter_accounts", + "replier_accounts", + ], + ) assert results["mentioner_accounts"] == {} assert results["reacter_accounts"] == {} @@ -50,39 +64,39 @@ def test_per_account_interaction_empty_inputs(): def test_per_account_interaction_accounts(): sample_input = [ { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 1}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 1}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 1}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], - "reacter_accounts": [[{"account": "Mehrdad", "count": 1}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 1}], + "reacter_accounts": [{"account": "Mehrdad", "count": 1}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 10}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 10}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 2}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "546", - "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "546", + "mentioner_accounts": [{"account": "mramin22#1669", "count": 10}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 2}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "000", - "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], - "reacter_accounts": [[{"account": "Behzad", "count": 6}]], - "replier_accounts": [[{"account": "Behzad", "count": 7}]], + "user": "acc1", + "channel_id": "000", + "mentioner_accounts": [{"account": "mramin22#1669", "count": 10}], + "reacter_accounts": [{"account": "Behzad", "count": 6}], + "replier_accounts": [{"account": "Behzad", "count": 7}], }, ] @@ -91,15 +105,24 @@ def test_per_account_interaction_accounts(): reacter_accounts_names = ["ahmadyazdanii#7517", "Mehrdad", "Behzad"] replier_accounts_names = ["Behzad", "ahmadyazdanii#7517"] - results = per_account_interactions(sample_input) + results = per_account_interactions( + sample_input, + dict_keys=[ + "mentioner_accounts", + "reacter_accounts", + "replier_accounts", + ], + ) # the whole results assersion - assert list(results.keys()) == [ - "replier_accounts", - "reacter_accounts", - "mentioner_accounts", - "all_interaction_accounts", - ] + assert set(results.keys()) == set( + [ + "replier_accounts", + "reacter_accounts", + "mentioner_accounts", + "all_interaction_accounts", + ] + ) # mentioner_accounts assersions action_type = "mentioner_accounts" @@ -124,39 +147,39 @@ def test_per_account_interaction_accounts(): def test_per_account_interaction_numbers(): sample_input = [ { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 1}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 1}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 1}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 1}]], - "reacter_accounts": [[{"account": "Mehrdad", "count": 1}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 1}], + "reacter_accounts": [{"account": "Mehrdad", "count": 1}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "1234", - "mentioner_accounts": [[{"account": "Ene SS Rawa#0855", "count": 10}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "1234", + "mentioner_accounts": [{"account": "Ene SS Rawa#0855", "count": 10}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 2}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "546", - "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], - "reacter_accounts": [[{"account": "ahmadyazdanii#7517", "count": 2}]], - "replier_accounts": [[{"account": "ahmadyazdanii#7517", "count": 5}]], + "user": "acc1", + "channel_id": "546", + "mentioner_accounts": [{"account": "mramin22#1669", "count": 10}], + "reacter_accounts": [{"account": "ahmadyazdanii#7517", "count": 2}], + "replier_accounts": [{"account": "ahmadyazdanii#7517", "count": 5}], }, { - "account_name": "acc1", - "channelId": "000", - "mentioner_accounts": [[{"account": "mramin22#1669", "count": 10}]], - "reacter_accounts": [[{"account": "Behzad", "count": 6}]], - "replier_accounts": [[{"account": "Behzad", "count": 7}]], + "user": "acc1", + "channel_id": "000", + "mentioner_accounts": [{"account": "mramin22#1669", "count": 10}], + "reacter_accounts": [{"account": "Behzad", "count": 6}], + "replier_accounts": [{"account": "Behzad", "count": 7}], }, ] @@ -168,7 +191,14 @@ def test_per_account_interaction_numbers(): "Behzad": 13, } - results = per_account_interactions(sample_input) + results = per_account_interactions( + sample_input, + dict_keys=[ + "mentioner_accounts", + "reacter_accounts", + "replier_accounts", + ], + ) # 5 users we had assert len(results["all_interaction_accounts"].values()) == 5 @@ -181,7 +211,3 @@ def test_per_account_interaction_numbers(): acc_interaction_count = account_res[i]["count"] assert acc_name in account_sum_interaction.keys() assert account_sum_interaction[acc_name] == acc_interaction_count - - -if __name__ == "__main__": - test_per_account_interaction_accounts() diff --git a/tests/unit/test_prepare_results_per_acc.py b/tests/unit/test_prepare_results_per_acc.py index c845828..6cc66ea 100644 --- a/tests/unit/test_prepare_results_per_acc.py +++ b/tests/unit/test_prepare_results_per_acc.py @@ -1,4 +1,4 @@ -from discord_analyzer.analysis.utils.compute_interaction_mtx_utils import ( +from tc_analyzer_lib.algorithms.utils.compute_interaction_mtx_utils import ( prepare_per_account, ) @@ -14,7 +14,7 @@ def test_empty_db_results(): def test_single_document_db_results(): db_results_sample = [ { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], "replied_per_acc": [], @@ -30,13 +30,13 @@ def test_single_document_db_results(): def test_multiple_document_single_acc_db_results(): db_results_sample = [ { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], "replied_per_acc": [], @@ -52,13 +52,13 @@ def test_multiple_document_single_acc_db_results(): def test_single_document_multiple_acc_db_results(): db_results_sample = [ { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [[{"account": "795295822534148096", "count": 1}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 1}]], "replied_per_acc": [], @@ -75,31 +75,31 @@ def test_single_document_multiple_acc_db_results(): def test_multiple_document_multiple_acc_db_results(): db_results_sample = [ { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [ [{"account": "7952958225341480444", "count": 5}], @@ -114,19 +114,19 @@ def test_multiple_document_multiple_acc_db_results(): assert list(results.keys()) == ["968122690118512720", "968122690118512721"] assert results["968122690118512720"] == [ { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [[{"account": "795295822534148096", "count": 9}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 2}]], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [], }, { - "account_name": "968122690118512720", + "user": "968122690118512720", "reacted_per_acc": [], "mentioner_per_acc": [ [{"account": "7952958225341480444", "count": 5}], @@ -137,13 +137,13 @@ def test_multiple_document_multiple_acc_db_results(): ] assert results["968122690118512721"] == [ { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [[{"account": "795295822534148096", "count": 3}]], "mentioner_per_acc": [[{"account": "795295822534148096", "count": 4}]], "replied_per_acc": [], }, { - "account_name": "968122690118512721", + "user": "968122690118512721", "reacted_per_acc": [], "mentioner_per_acc": [], "replied_per_acc": [[{"account": "7952958225341480444", "count": 8}]], diff --git a/tests/unit/test_process_non_reaction_heatmaps.py b/tests/unit/test_process_non_reaction_heatmaps.py index 6532fca..fbe4c0b 100644 --- a/tests/unit/test_process_non_reaction_heatmaps.py +++ b/tests/unit/test_process_non_reaction_heatmaps.py @@ -1,15 +1,15 @@ from unittest import TestCase import numpy as np -from discord_analyzer.analysis.compute_interaction_matrix_discord import ( - process_non_reactions, +from tc_analyzer_lib.algorithms.compute_interaction_matrix_discord import ( + process_actions, ) class TestProcessNonReactions(TestCase): def test_empty_inputs(self): intput_data = {} - results = process_non_reactions(heatmaps_data_per_acc=intput_data) + results = process_actions(heatmaps_data_per_acc=intput_data, skip_fields=[]) self.assertEqual(results, {}) def test_single_account_no_action(self): @@ -29,7 +29,9 @@ def test_single_account_no_action(self): } ] } - results = process_non_reactions(input_data) + results = process_actions( + input_data, skip_fields=["date", "reacted_per_acc", "replied_per_acc"] + ) expected_results = { "acc1": [ @@ -62,24 +64,26 @@ def test_single_account_with_action(self): "lone_messages": lone_messages, "thr_messages": thr_messages, "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": [], "date": "2024-01-01", } ] } - results = process_non_reactions(input_data) + results = process_actions( + input_data, skip_fields=["date", "replied_per_acc", "reacted_per_acc"] + ) expected_results = { "acc1": [ { - "lone_messages": [[{"account": "acc1", "count": 3}]], - "thr_messages": [[{"account": "acc1", "count": 1}]], + "lone_messages": [{"account": "acc1", "count": 3}], + "thr_messages": [{"account": "acc1", "count": 1}], # others same as before "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": [], "date": "2024-01-01", @@ -106,8 +110,8 @@ def test_multiple_account_with_action(self): "lone_messages": user1_lone_messages, "thr_messages": user1_thr_messages, "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-01", @@ -118,24 +122,26 @@ def test_multiple_account_with_action(self): "lone_messages": np.zeros(24), "thr_messages": user2_thr_messages, "reacted_per_acc": [ - [{"account": "acc5", "count": 3}], + {"account": "acc5", "count": 3}, ], "replied_per_acc": [], "date": "2024-01-01", } ], } - results = process_non_reactions(input_data) + results = process_actions( + input_data, skip_fields=["date", "replied_per_acc", "reacted_per_acc"] + ) expected_results = { "acc1": [ { - "lone_messages": [[{"account": "acc1", "count": 3}]], - "thr_messages": [[{"account": "acc1", "count": 1}]], + "lone_messages": [{"account": "acc1", "count": 3}], + "thr_messages": [{"account": "acc1", "count": 1}], # others same as before "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-01", @@ -144,10 +150,10 @@ def test_multiple_account_with_action(self): "acc2": [ { "lone_messages": [], - "thr_messages": [[{"account": "acc2", "count": 7}]], + "thr_messages": [{"account": "acc2", "count": 7}], # others same as before "reacted_per_acc": [ - [{"account": "acc5", "count": 3}], + {"account": "acc5", "count": 3}, ], "replied_per_acc": [], "date": "2024-01-01", @@ -174,8 +180,8 @@ def test_multiple_account_multiple_documents_with_action(self): "lone_messages": user1_lone_messages, "thr_messages": user1_thr_messages, "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-01", @@ -184,8 +190,8 @@ def test_multiple_account_multiple_documents_with_action(self): "lone_messages": np.zeros(24), "thr_messages": user1_lone_messages, "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-02", @@ -196,35 +202,37 @@ def test_multiple_account_multiple_documents_with_action(self): "lone_messages": np.zeros(24), "thr_messages": user2_thr_messages, "reacted_per_acc": [ - [{"account": "acc5", "count": 3}], + {"account": "acc5", "count": 3}, ], "replied_per_acc": [], "date": "2024-01-01", } ], } - results = process_non_reactions(input_data) + results = process_actions( + input_data, skip_fields=["date", "reacted_per_acc", "replied_per_acc"] + ) expected_results = { "acc1": [ { - "lone_messages": [[{"account": "acc1", "count": 3}]], - "thr_messages": [[{"account": "acc1", "count": 1}]], + "lone_messages": [{"account": "acc1", "count": 3}], + "thr_messages": [{"account": "acc1", "count": 1}], # others same as before "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-01", }, { "lone_messages": [], - "thr_messages": [[{"account": "acc1", "count": 3}]], + "thr_messages": [{"account": "acc1", "count": 3}], # others same as before "reacted_per_acc": [ - [{"account": "acc2", "count": 1}], - [{"account": "acc3", "count": 5}], + {"account": "acc2", "count": 1}, + {"account": "acc3", "count": 5}, ], "replied_per_acc": {}, "date": "2024-01-02", @@ -233,10 +241,10 @@ def test_multiple_account_multiple_documents_with_action(self): "acc2": [ { "lone_messages": [], - "thr_messages": [[{"account": "acc2", "count": 7}]], + "thr_messages": [{"account": "acc2", "count": 7}], # others same as before "reacted_per_acc": [ - [{"account": "acc5", "count": 3}], + {"account": "acc5", "count": 3}, ], "replied_per_acc": [], "date": "2024-01-01", diff --git a/tests/unit/test_schema_hourly_analytics.py b/tests/unit/test_schema_hourly_analytics.py new file mode 100644 index 0000000..d6b573c --- /dev/null +++ b/tests/unit/test_schema_hourly_analytics.py @@ -0,0 +1,96 @@ +from unittest import TestCase + +from tc_analyzer_lib.schemas import ActivityDirection, ActivityType, HourlyAnalytics + + +class TestHourlyAnalytics(TestCase): + def test_initialization_with_metadata(self): + analytics = HourlyAnalytics( + name="analytics1", + type=ActivityType.ACTION, + member_activities_used=True, + rawmemberactivities_condition={"key": "value"}, + direction=ActivityDirection.EMITTER, + ) + self.assertEqual(analytics.name, "analytics1") + self.assertEqual(analytics.type, ActivityType.ACTION) + self.assertTrue(analytics.member_activities_used) + self.assertEqual(analytics.rawmemberactivities_condition, {"key": "value"}) + self.assertEqual(analytics.direction, ActivityDirection.EMITTER) + + def test_initialization_without_metadata(self): + analytics = HourlyAnalytics( + name="analytics1", + type=ActivityType.INTERACTION, + member_activities_used=True, + rawmemberactivities_condition=None, + direction=ActivityDirection.RECEIVER, + ) + self.assertEqual(analytics.name, "analytics1") + self.assertEqual(analytics.type, ActivityType.INTERACTION) + self.assertEqual(analytics.direction, ActivityDirection.RECEIVER) + self.assertTrue(analytics.member_activities_used) + self.assertIsNone(analytics.rawmemberactivities_condition) + + def test_to_dict_with_metadata(self): + analytics = HourlyAnalytics( + name="analytics1", + type=ActivityType.ACTION, + member_activities_used=True, + rawmemberactivities_condition={"key": "value"}, + direction=ActivityDirection.EMITTER, + ) + expected_dict = { + "name": "analytics1", + "type": "actions", + "member_activities_used": True, + "rawmemberactivities_condition": {"key": "value"}, + "direction": "emitter", + "activity_name": None, + } + self.assertEqual(analytics.to_dict(), expected_dict) + + def test_to_dict_without_metadata(self): + analytics = HourlyAnalytics( + name="analytics1", + type=ActivityType.INTERACTION, + member_activities_used=True, + direction=ActivityDirection.RECEIVER, + ) + expected_dict = { + "name": "analytics1", + "type": "interactions", + "member_activities_used": True, + "direction": "receiver", + "activity_name": None, + } + self.assertEqual(analytics.to_dict(), expected_dict) + + def test_from_dict_with_metadata(self): + data = { + "name": "analytics1", + "type": "actions", + "member_activities_used": True, + "rawmemberactivities_condition": {"key": "value"}, + "direction": "emitter", + } + analytics = HourlyAnalytics.from_dict(data) + self.assertEqual(analytics.name, "analytics1") + self.assertEqual(analytics.type, ActivityType.ACTION) + self.assertEqual(analytics.direction, ActivityDirection.EMITTER) + self.assertTrue(analytics.member_activities_used) + self.assertEqual(analytics.rawmemberactivities_condition, {"key": "value"}) + + def test_from_dict_without_metadata(self): + data = { + "name": "analytics1", + "type": "interactions", + "member_activities_used": True, + "direction": "receiver", + } + analytics = HourlyAnalytics.from_dict(data) + self.assertEqual(analytics.name, "analytics1") + self.assertEqual(analytics.type, ActivityType.INTERACTION) + self.assertEqual(analytics.direction, ActivityDirection.RECEIVER) + self.assertTrue(analytics.member_activities_used) + self.assertIsNone(analytics.rawmemberactivities_condition) diff --git a/tests/unit/test_schema_raw_analytics.py b/tests/unit/test_schema_raw_analytics.py new file mode 100644 index 0000000..71893a8 --- /dev/null +++ b/tests/unit/test_schema_raw_analytics.py @@ -0,0 +1,74 @@ +from unittest import TestCase + +from tc_analyzer_lib.schemas import ActivityDirection, ActivityType +from tc_analyzer_lib.schemas.platform_configs.config_base import RawAnalytics + + +class TestRawAnalytics(TestCase): + def test_initialization(self): + # Valid initialization + raw_analytics = RawAnalytics( + name="analytics1", + type=ActivityType.ACTION, + member_activities_used=True, + direction=ActivityDirection.EMITTER, + ) + self.assertEqual(raw_analytics.name, "analytics1") + self.assertEqual(raw_analytics.type, ActivityType.ACTION) + self.assertTrue(raw_analytics.member_activities_used) + + # Invalid initialization (Invalid ActivityType) + with self.assertRaises(ValueError): + RawAnalytics( + name="analytics1", + type="invalid_type", + member_activities_used=True, + direction=ActivityDirection.RECEIVER, + ) + + def test_to_dict(self): + raw_analytics = RawAnalytics( + name="analytics1", + type=ActivityType.INTERACTION, + member_activities_used=False, + direction=ActivityDirection.EMITTER, + ) + expected_dict = { + "name": "analytics1", + "type": "interactions", + "member_activities_used": False, + "activity_name": None, + "direction": "emitter", + "rawmemberactivities_condition": None, + } + print(raw_analytics.to_dict()) + self.assertEqual(raw_analytics.to_dict(), expected_dict) + + def test_from_dict(self): + data = { + "name": "analytics1", + "type": "actions", + "member_activities_used": True, + "direction": "receiver", + } + raw_analytics = RawAnalytics.from_dict(data) + self.assertEqual(raw_analytics.name, "analytics1") + self.assertEqual(raw_analytics.type, ActivityType.ACTION) + self.assertTrue(raw_analytics.member_activities_used) + + # Invalid from_dict (missing keys) + invalid_data = { + "name": "analytics1", + "member_activities_used": True, + } + with self.assertRaises(KeyError): + RawAnalytics.from_dict(invalid_data) + + # Invalid from_dict (invalid type) + invalid_data_type = { + "name": "analytics1", + "type": "invalid_type", + "member_activities_used": True, + } + with self.assertRaises(ValueError): + RawAnalytics.from_dict(invalid_data_type) diff --git a/tests/unit/test_sum_interactions_features.py b/tests/unit/test_sum_interactions_features.py deleted file mode 100644 index 4dbf531..0000000 --- a/tests/unit/test_sum_interactions_features.py +++ /dev/null @@ -1,724 +0,0 @@ -from discord_analyzer.analysis.analytics_interactions_script import ( - sum_interactions_features, -) - - -def test_sum_interactions_features_out_length(): - interactions = [ - "thr_messages", - "lone_messages", - "replier", - "replied", - "replied", - "mentioner", - "mentioned", - "reacter", - "reacted", - ] - sample_input = [] - output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) - - for action in interactions: - # 24 hours - assert len(output[action]) == 24 - - -def test_sum_interactions_features_empty_input(): - interactions = [ - "thr_messages", - "lone_messages", - "replier", - "replied", - "replied", - "mentioner", - "mentioned", - "reacter", - "reacted", - ] - sample_input = [] - output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) - - for action in interactions: - assert sum(output[action]) == 0 - - -def test_sum_interactions_features_single_input(): - interactions = [ - "thr_messages", - "lone_messages", - "replier", - "replied", - "mentioner", - "mentioned", - "reacter", - "reacted", - ] - sample_input = [ - { - "thr_messages": [ - 0, - 0, - 5, - 107, - 0, - 1, - 0, - 0, - 0, - 0, - 4, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "lone_messages": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 80, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "replier": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 5, - 0, - ], - "replied": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - ], - "mentioner": [ - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "mentioned": [ - 0, - 0, - 0, - 0, - 0, - 3, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacter": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacted": [ - 50000, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 100000, - 0, - 0, - 0, - 0, - 0, - 0, - ], - } - ] - - output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) - - assert sum(output["thr_messages"]) == 117 - assert sum(output["lone_messages"]) == 80 - assert sum(output["replier"]) == 5 - assert sum(output["replied"]) == 24 - assert sum(output["mentioner"]) == 2 - assert sum(output["mentioned"]) == 3 - assert sum(output["reacter"]) == 1 - assert sum(output["reacted"]) == 150000 - - -def test_sum_interactions_features_multiple_input(): - interactions = [ - "thr_messages", - "lone_messages", - "replier", - "replied", - "mentioner", - "mentioned", - "reacter", - "reacted", - ] - sample_input = [ - { - "thr_messages": [ - 0, - 0, - 5, - 107, - 0, - 1, - 0, - 0, - 0, - 0, - 4, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "lone_messages": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 80, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "replier": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 5, - 0, - ], - "replied": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - ], - "mentioner": [ - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "mentioned": [ - 0, - 0, - 0, - 0, - 0, - 3, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacter": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacted": [ - 50000, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 100000, - 0, - 0, - 0, - 0, - 0, - 0, - ], - }, - { - "thr_messages": [ - 0, - 0, - 5, - 100, - 0, - 1, - 0, - 0, - 0, - 0, - 4, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "lone_messages": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 80, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "replier": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 5, - 0, - ], - "replied": [ - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - 1, - ], - "mentioner": [ - 0, - 0, - 0, - 1, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "mentioned": [ - 0, - 0, - 0, - 0, - 0, - 3, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacter": [ - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 1, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - ], - "reacted": [ - 50000, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 100000, - 0, - 0, - 0, - 0, - 0, - 0, - ], - }, - ] - - output = sum_interactions_features(cursor_list=sample_input, dict_keys=interactions) - - assert sum(output["thr_messages"]) == 227 - assert sum(output["lone_messages"]) == 160 - assert sum(output["replier"]) == 10 - assert sum(output["replied"]) == 48 - assert sum(output["mentioner"]) == 4 - assert sum(output["mentioned"]) == 6 - assert sum(output["reacter"]) == 2 - assert sum(output["reacted"]) == 300000 diff --git a/utils/__init__.py b/utils/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/utils/get_guild_utils.py b/utils/get_guild_utils.py deleted file mode 100644 index 34f16c8..0000000 --- a/utils/get_guild_utils.py +++ /dev/null @@ -1,57 +0,0 @@ -from bson.objectid import ObjectId -from utils.mongo import MongoSingleton - - -def get_guild_community_ids(platform_id: str) -> str: - """ - get both the guild id and community from the platform id - - Parameters - ----------- - platform_id : str - the platform `_id` within the platforms collection - - Returns - -------- - guild_id : str - the discord guild id for that specific platform - """ - mongo_client = MongoSingleton.get_instance().client - - obj_platform_id = ObjectId(platform_id) - platform = mongo_client["Core"]["platforms"].find_one( - {"name": "discord", "_id": obj_platform_id}, - {"metadata.id": 1}, - ) - if platform is None: - raise AttributeError(f"PLATFORM_ID: {platform_id}, No guild found!") - - guild_id = platform["metadata"]["id"] - return guild_id - - -def get_guild_platform_id(guild_id: str) -> str: - """ - get the guild platform id using the given guild id - - Parameters - ------------ - guild_id : str - the id for the specified guild - - Returns - -------- - platform_id : str - the platform id related to the given guild - """ - mongo_client = MongoSingleton.get_instance().client - - guild_info = mongo_client["Core"]["platforms"].find_one( - {"metadata.id": guild_id}, {"_id": 1} - ) - if guild_info is not None: - platform_id = str(guild_info["_id"]) - else: - raise ValueError(f"No available guild with id {guild_id}") - - return platform_id diff --git a/utils/rabbitmq.py b/utils/rabbitmq.py deleted file mode 100644 index 452c69a..0000000 --- a/utils/rabbitmq.py +++ /dev/null @@ -1,42 +0,0 @@ -import logging - -from tc_messageBroker import RabbitMQ -from tc_messageBroker.rabbit_mq.queue import Queue -from utils.credentials import get_rabbit_mq_credentials - - -class RabbitMQSingleton: - __instance = None - - def __init__(self): - if RabbitMQSingleton.__instance is not None: - raise Exception("This class is a singleton!") - else: - creds = get_rabbit_mq_credentials() - self.client = self.create_rabbitmq_client(creds) - RabbitMQSingleton.__instance = self - - @staticmethod - def get_instance(): - if RabbitMQSingleton.__instance is None: - try: - RabbitMQSingleton() - logging.info("RabbitMQ broker Connected Successfully!") - except Exception as exp: - logging.error(f"RabbitMQ broker not connected! exp: {exp}") - - return RabbitMQSingleton.__instance - - def get_client(self): - return self.client - - def create_rabbitmq_client(self, rabbit_creds: dict[str, str]): - rabbitmq = RabbitMQ( - broker_url=rabbit_creds["broker_url"], - port=rabbit_creds["port"], - username=rabbit_creds["username"], - password=rabbit_creds["password"], - ) - rabbitmq.connect(queue_name=Queue.DISCORD_ANALYZER) - - return rabbitmq diff --git a/utils/sentryio_service.py b/utils/sentryio_service.py deleted file mode 100644 index bf4da9e..0000000 --- a/utils/sentryio_service.py +++ /dev/null @@ -1,14 +0,0 @@ -import sentry_sdk -from utils.credentials import get_sentryio_service_creds - - -def set_up_sentryio(sample_rate=1.0): - sentry_creds = get_sentryio_service_creds() - sentry_sdk.init( - dsn=sentry_creds["dsn"], - environment=sentry_creds["env"], - # Set traces_sample_rate to 1.0 to capture 100% - # of transactions for performance monitoring. - # We recommend adjusting this value in production. - traces_sample_rate=sample_rate, - ) diff --git a/utils/transactions_ordering.py b/utils/transactions_ordering.py deleted file mode 100644 index d806736..0000000 --- a/utils/transactions_ordering.py +++ /dev/null @@ -1,61 +0,0 @@ -import numpy as np -from tc_messageBroker.rabbit_mq.status import Status - - -def sort_transactions(transactions: list): - """ - sort transactions by their order and status - the NOT_STARTED ones would be at the first of the list - and they are ordered by `order` property - - Parameters: - ------------ - transactions : list[ITransaction] - the list of transactions to order - - Returns: - --------- - transactions_ordered : ndarray(ITransaction) - the transactions ordered by status - the `NOT_STARTED` ones are the firsts - it is actually a numpy array for us to be able to - change the properties in deep memory - tx_not_started_count : int - the not started transactions count - """ - tx_not_started = [] - tx_other = [] - - for tx in transactions: - if tx.status == Status.NOT_STARTED: - tx_not_started.append(tx) - else: - tx_other.append(tx) - - tx_not_started_count = len(tx_not_started) - tx_not_started_sorted = sort_transactions_orderly(tx_not_started) - - transactions_ordered = list(tx_not_started_sorted) - transactions_ordered.extend(tx_other) - - return np.array(transactions_ordered), tx_not_started_count - - -def sort_transactions_orderly(transactions: list): - """ - sort transactions by their `order` property - - Parameters: - ------------ - transactions : list[ITransaction] - the list of transactions to order - - Returns: - --------- - transactions_orderly_sorted : list[ITransaction] - transactions sorted by their order - """ - orders = [tx.order for tx in transactions] - sorted_indices = np.argsort(orders) - - return np.array(transactions)[sorted_indices] diff --git a/worker.py b/worker.py deleted file mode 100644 index d756fd7..0000000 --- a/worker.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging - -from rq import Worker -from utils.redis import RedisSingleton - - -def worker_exception_handler(job, exc_type, exc_value, traceback): - logging.error(" ========= RQ Exception =========") - logging.error(f"JOB: {job}") - logging.error(f"exc_type: {exc_type}") - logging.error(f"exc_value: {exc_value}") - logging.error(f"traceback: {traceback}") - - -if __name__ == "__main__": - logging.basicConfig() - logging.getLogger().setLevel(logging.INFO) - - r = RedisSingleton.get_instance().get_client() - worker = Worker( - queues=["default"], connection=r, exception_handlers=worker_exception_handler - ) - logging.info("Registered the worker!") - try: - worker.work(with_scheduler=True, max_jobs=1) - except KeyboardInterrupt: - worker.clean_registries() - worker.stop_scheduler()