From 7f131172eb72677418303c85c5d892fc11cc963e Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Sun, 12 Jan 2025 17:41:47 -0500 Subject: [PATCH 01/12] work-in-progress: when upload a text file attachment, ask for permission to auto-upload to pastebin. Also DMs the delete URL to the user. This code will very likely be moved elsewhere before/if it is merged. --- bot/exts/filtering/_filter_lists/extension.py | 92 ++++++++++++++++++- bot/exts/filtering/filtering.py | 2 + 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/bot/exts/filtering/_filter_lists/extension.py b/bot/exts/filtering/_filter_lists/extension.py index d656bc6d23..3e7fa8e755 100644 --- a/bot/exts/filtering/_filter_lists/extension.py +++ b/bot/exts/filtering/_filter_lists/extension.py @@ -1,10 +1,18 @@ from __future__ import annotations +import logging +import re import typing from os.path import splitext +import aiohttp +import discord +from discord.ext import commands +from pydis_core.utils import paste_service + import bot -from bot.constants import Channels +from bot.bot import Bot +from bot.constants import Channels, Emojis from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists.filter_list import FilterList, ListType from bot.exts.filtering._filters.extension import ExtensionFilter @@ -20,7 +28,7 @@ f"please use a code-pasting service such as {PASTE_URL}" ) -TXT_LIKE_FILES = {".txt", ".csv", ".json"} +TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"} TXT_EMBED_DESCRIPTION = ( "You either uploaded a `{blocked_extension}` file or entered a message that was too long. " f"Please use our [paste bin]({PASTE_URL}) instead." @@ -32,6 +40,9 @@ "Feel free to ask in {meta_channel_mention} if you think this is a mistake." ) +PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark +DELETE_PASTE_EMOJI = Emojis.trashcan + class ExtensionsList(FilterList[ExtensionFilter]): """ @@ -116,3 +127,80 @@ async def actions_for( ctx.blocked_exts |= set(not_allowed) actions = self[ListType.ALLOW].defaults.actions if ctx.event != Event.SNEKBOX else None return actions, [f"`{ext}`" if ext else "`No Extension`" for ext in not_allowed], {ListType.ALLOW: triggered} + + +class EmbedFileHandler(commands.Cog): + + def __init__(self, bot: Bot): + self.bot = bot + + @staticmethod + async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile: + encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1) + file_content = (await attachment.read()).decode(encoding) + return paste_service.PasteFile(content=file_content, name=attachment.filename) + + @commands.Cog.listener() + async def on_message(self, message: discord.Message) -> None: + # Check if the message contains an embedded file and is not sent by a bot + if message.author.bot or not message.attachments: + return + + bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") + await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) + + def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool: + return ( + reaction.message.id == bot_reply.id + and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI + and user == message.author + ) + + try: + # Wait for the reaction with a timeout of 60 seconds + await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) + except TimeoutError: + await bot_reply.edit(content=f"~~{bot_reply.content}~~") + await bot_reply.clear_reactions() + return + + logging.info({f.filename: f.content_type for f in message.attachments}) + + files = [ + await self._convert_attachment(f) + for f in message.attachments + if f.content_type.startswith("text") + ] + + try: + async with aiohttp.ClientSession() as session: + paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) + except (paste_service.PasteTooLongError, ValueError): + # paste is too long + await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") + return + except paste_service.PasteUploadError: + await bot_reply.edit(content="There was an error uploading your paste.") + return + + # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a + # preview, thereby deleting the paste + await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.") + + await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.") + await bot_reply.clear_reactions() + await bot_reply.add_reaction(DELETE_PASTE_EMOJI) + + def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool: + return ( + reaction.message.id == bot_reply.id + and str(reaction.emoji) == DELETE_PASTE_EMOJI + and user == message.author + ) + + try: + await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction) + await paste_response.delete() + await bot_reply.delete() + except TimeoutError: + pass diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index 844f2942e6..929eb064ce 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -28,6 +28,7 @@ from bot.exts.backend.branding._repository import HEADERS, PARAMS from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists import FilterList, ListType, ListTypeConverter, filter_list_types +from bot.exts.filtering._filter_lists.extension import EmbedFileHandler from bot.exts.filtering._filter_lists.filter_list import AtomicList from bot.exts.filtering._filters.filter import Filter, UniqueFilter from bot.exts.filtering._settings import ActionSettings @@ -1492,3 +1493,4 @@ async def cog_unload(self) -> None: async def setup(bot: Bot) -> None: """Load the Filtering cog.""" await bot.add_cog(Filtering(bot)) + await bot.add_cog(EmbedFileHandler(bot)) From bb808ee66f191f3a9bb3fa92d2cb69396ab65256 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Sun, 12 Jan 2025 17:50:01 -0500 Subject: [PATCH 02/12] Migrate to py3.12 generic syntax (removes TypeVar). --- bot/exts/filtering/_filter_lists/filter_list.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bot/exts/filtering/_filter_lists/filter_list.py b/bot/exts/filtering/_filter_lists/filter_list.py index 2cc54e8fbc..48d05c97a6 100644 --- a/bot/exts/filtering/_filter_lists/filter_list.py +++ b/bot/exts/filtering/_filter_lists/filter_list.py @@ -157,10 +157,7 @@ def __hash__(self): return hash(id(self)) -T = typing.TypeVar("T", bound=Filter) - - -class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring): +class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring): """Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take.""" # Each subclass must define a name matching the filter_list name we're expecting to receive from the database. From 3ea3a27fb0792e73744ce315e2a512ead09e3112 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Sun, 19 Jan 2025 12:16:14 -0500 Subject: [PATCH 03/12] Migrate to py3.12 typing syntax. --- bot/exts/filtering/_settings.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/bot/exts/filtering/_settings.py b/bot/exts/filtering/_settings.py index 7005dd2d1b..6760a3f01a 100644 --- a/bot/exts/filtering/_settings.py +++ b/bot/exts/filtering/_settings.py @@ -5,7 +5,7 @@ from abc import abstractmethod from copy import copy from functools import reduce -from typing import Any, NamedTuple, Self, TypeVar +from typing import Any, NamedTuple, Self from bot.exts.filtering._filter_context import FilterContext from bot.exts.filtering._settings_types import settings_types @@ -13,13 +13,9 @@ from bot.exts.filtering._utils import FieldRequiring from bot.log import get_logger -TSettings = TypeVar("TSettings", bound="Settings") - log = get_logger(__name__) -_already_warned: set[str] = set() - -T = TypeVar("T", bound=SettingsEntry) +_already_warned = set[str]() def create_settings( @@ -55,7 +51,7 @@ def create_settings( ) -class Settings(FieldRequiring, dict[str, T]): +class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]): """ A collection of settings. @@ -69,7 +65,7 @@ class Settings(FieldRequiring, dict[str, T]): entry_type: type[T] - _already_warned: set[str] = set() + _already_warned = set[str]() @abstractmethod # ABCs have to have at least once abstract method to actually count as such. def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False): @@ -104,7 +100,7 @@ def overrides(self) -> dict[str, Any]: """Return a dictionary of overrides across all entries.""" return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {}) - def copy(self: TSettings) -> TSettings: + def copy(self: Self) -> Self: """Create a shallow copy of the object.""" return copy(self) From ea62c748234fc722756faefb9884976f1af2112b Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Sun, 19 Jan 2025 12:17:51 -0500 Subject: [PATCH 04/12] Apply token filters to text attachment content. Works by appending text attachment content to message content, and then applying the filters normally. --- bot/exts/filtering/filtering.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index 929eb064ce..a281aff79d 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -81,7 +81,7 @@ class Filtering(Cog): def __init__(self, bot: Bot): self.bot = bot self.filter_lists: dict[str, FilterList] = {} - self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list) + self._subscriptions = defaultdict[Event, list[FilterList]](list) self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__) self.webhook: discord.Webhook | None = None @@ -224,6 +224,15 @@ async def on_message(self, msg: Message) -> None: self.message_cache.append(msg) ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache) + + text_contents = [ + f"{a.filename}: " + (await a.read()).decode() + for a in msg.attachments if a.content_type.startswith("text") + ] + if text_contents: + attachment_content = "\n\n".join(text_contents) + ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}") + result_actions, list_messages, triggers = await self._resolve_action(ctx) self.message_cache.update(msg, metadata=triggers) if result_actions: From 7d424b8d2d5b1445e753cb9fbdffb91346a641e9 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 18:51:18 -0500 Subject: [PATCH 05/12] Add helper function for extracting attachment text. Implements a somewhat arbitrary limit on how much text content is passed along for filtering, to avoid wasting compute time on large attachments that aren't intended to be read (such as CSVs) --- bot/exts/filtering/filtering.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index a281aff79d..f902ee9ec7 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -67,6 +67,13 @@ WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday +async def _extract_text_file_content(att: discord.Attachment) -> str: + """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" + file_lines: list[str] = (await att.read()).decode().splitlines() + first_n_lines = "\n".join(file_lines[:30])[:2_000] + return f"{att.filename}: {first_n_lines}" + + class Filtering(Cog): """Filtering and alerting for content posted on the server.""" @@ -226,7 +233,7 @@ async def on_message(self, msg: Message) -> None: ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache) text_contents = [ - f"{a.filename}: " + (await a.read()).decode() + await _extract_text_file_content(a) for a in msg.attachments if a.content_type.startswith("text") ] if text_contents: From 9f4177e33c5ba118c90505868d009ab7b46561d5 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 19:33:02 -0500 Subject: [PATCH 06/12] Move EmbedFileHandler cog to its own module --- bot/exts/filtering/_filter_lists/extension.py | 85 -------------- bot/exts/filtering/filtering.py | 2 - .../utils/attachment_pastebin_uploader.py | 111 ++++++++++++++++++ 3 files changed, 111 insertions(+), 87 deletions(-) create mode 100644 bot/exts/utils/attachment_pastebin_uploader.py diff --git a/bot/exts/filtering/_filter_lists/extension.py b/bot/exts/filtering/_filter_lists/extension.py index 3e7fa8e755..a89a980a7e 100644 --- a/bot/exts/filtering/_filter_lists/extension.py +++ b/bot/exts/filtering/_filter_lists/extension.py @@ -1,17 +1,9 @@ from __future__ import annotations -import logging -import re import typing from os.path import splitext -import aiohttp -import discord -from discord.ext import commands -from pydis_core.utils import paste_service - import bot -from bot.bot import Bot from bot.constants import Channels, Emojis from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists.filter_list import FilterList, ListType @@ -127,80 +119,3 @@ async def actions_for( ctx.blocked_exts |= set(not_allowed) actions = self[ListType.ALLOW].defaults.actions if ctx.event != Event.SNEKBOX else None return actions, [f"`{ext}`" if ext else "`No Extension`" for ext in not_allowed], {ListType.ALLOW: triggered} - - -class EmbedFileHandler(commands.Cog): - - def __init__(self, bot: Bot): - self.bot = bot - - @staticmethod - async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile: - encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1) - file_content = (await attachment.read()).decode(encoding) - return paste_service.PasteFile(content=file_content, name=attachment.filename) - - @commands.Cog.listener() - async def on_message(self, message: discord.Message) -> None: - # Check if the message contains an embedded file and is not sent by a bot - if message.author.bot or not message.attachments: - return - - bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") - await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) - - def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool: - return ( - reaction.message.id == bot_reply.id - and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI - and user == message.author - ) - - try: - # Wait for the reaction with a timeout of 60 seconds - await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) - except TimeoutError: - await bot_reply.edit(content=f"~~{bot_reply.content}~~") - await bot_reply.clear_reactions() - return - - logging.info({f.filename: f.content_type for f in message.attachments}) - - files = [ - await self._convert_attachment(f) - for f in message.attachments - if f.content_type.startswith("text") - ] - - try: - async with aiohttp.ClientSession() as session: - paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) - except (paste_service.PasteTooLongError, ValueError): - # paste is too long - await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") - return - except paste_service.PasteUploadError: - await bot_reply.edit(content="There was an error uploading your paste.") - return - - # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a - # preview, thereby deleting the paste - await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.") - - await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.") - await bot_reply.clear_reactions() - await bot_reply.add_reaction(DELETE_PASTE_EMOJI) - - def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool: - return ( - reaction.message.id == bot_reply.id - and str(reaction.emoji) == DELETE_PASTE_EMOJI - and user == message.author - ) - - try: - await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction) - await paste_response.delete() - await bot_reply.delete() - except TimeoutError: - pass diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index f902ee9ec7..e1483e18f4 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -28,7 +28,6 @@ from bot.exts.backend.branding._repository import HEADERS, PARAMS from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists import FilterList, ListType, ListTypeConverter, filter_list_types -from bot.exts.filtering._filter_lists.extension import EmbedFileHandler from bot.exts.filtering._filter_lists.filter_list import AtomicList from bot.exts.filtering._filters.filter import Filter, UniqueFilter from bot.exts.filtering._settings import ActionSettings @@ -1509,4 +1508,3 @@ async def cog_unload(self) -> None: async def setup(bot: Bot) -> None: """Load the Filtering cog.""" await bot.add_cog(Filtering(bot)) - await bot.add_cog(EmbedFileHandler(bot)) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py new file mode 100644 index 0000000000..cc507f39ed --- /dev/null +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import logging +import re + +import aiohttp +import discord +from discord.ext import commands +from pydis_core.utils import paste_service + +from bot.bot import Bot +from bot.constants import Emojis + +PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark +DELETE_PASTE_EMOJI = Emojis.trashcan + + +class EmbedFileHandler(commands.Cog): + """ + Handles automatic uploading of attachments to the paste bin. + + Whenever a user uploads one or more attachments that is text-based (py, txt, csv, etc.), this cog offers to upload + all the attachments to the paste bin automatically. The steps are as follows: + - The bot replies to the message containing the attachments, asking the user to react with a checkmark to consent + to having the content uploaded. + - If consent is given, the bot uploads the contents and edits its own message to contain the link. + - The bot DMs the user the delete link for the paste. + - The bot waits for the user to react with a trashcan emoji, in which case the bot deletes the paste and its own + message. + """ + + def __init__(self, bot: Bot): + self.bot = bot + + @staticmethod + async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile: + """Converts an attachment to a PasteFile, according to the attachment's file encoding.""" + encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1) + file_content = (await attachment.read()).decode(encoding) + return paste_service.PasteFile(content=file_content, name=attachment.filename) + + @commands.Cog.listener() + async def on_message(self, message: discord.Message) -> None: + """Listens for messages containing attachments and offers to upload them to the pastebin.""" + # Check if the message contains an embedded file and is not sent by a bot + if message.author.bot or not message.attachments: + return + + bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") + await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) + + def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool: + return ( + reaction.message.id == bot_reply.id + and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI + and user == message.author + ) + + try: + # Wait for the reaction with a timeout of 60 seconds + await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) + except TimeoutError: + await bot_reply.edit(content=f"~~{bot_reply.content}~~") + await bot_reply.clear_reactions() + return + + logging.info({f.filename: f.content_type for f in message.attachments}) + + files = [ + await self._convert_attachment(f) + for f in message.attachments + if f.content_type.startswith("text") + ] + + try: + async with aiohttp.ClientSession() as session: + paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) + except (paste_service.PasteTooLongError, ValueError): + # paste is too long + await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") + return + except paste_service.PasteUploadError: + await bot_reply.edit(content="There was an error uploading your paste.") + return + + # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a + # preview, thereby deleting the paste + await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.") + + await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.") + await bot_reply.clear_reactions() + await bot_reply.add_reaction(DELETE_PASTE_EMOJI) + + def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool: + return ( + reaction.message.id == bot_reply.id + and str(reaction.emoji) == DELETE_PASTE_EMOJI + and user == message.author + ) + + try: + await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction) + await paste_response.delete() + await bot_reply.delete() + except TimeoutError: + pass + + +async def setup(bot: Bot) -> None: + """Load the EmbedFileHandler cog.""" + await bot.add_cog(EmbedFileHandler(bot)) From 1f50e946a67dd32fab53b293612e70453e57c93b Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 19:38:59 -0500 Subject: [PATCH 07/12] Exit early if none of the attachments are text. Previously, the bot might have offered to upload the attachments in a message containing only images, and then done nothing. --- bot/exts/utils/attachment_pastebin_uploader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index cc507f39ed..d6d3c7d2cd 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -43,7 +43,7 @@ async def _convert_attachment(attachment: discord.Attachment) -> paste_service.P async def on_message(self, message: discord.Message) -> None: """Listens for messages containing attachments and offers to upload them to the pastebin.""" # Check if the message contains an embedded file and is not sent by a bot - if message.author.bot or not message.attachments: + if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments): return bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") @@ -76,7 +76,6 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - async with aiohttp.ClientSession() as session: paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) except (paste_service.PasteTooLongError, ValueError): - # paste is too long await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") return except paste_service.PasteUploadError: From 9aba05802bc89e367c47703c7587e758973e38a6 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 19:45:18 -0500 Subject: [PATCH 08/12] Add comments explaining each step. --- bot/exts/utils/attachment_pastebin_uploader.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index d6d3c7d2cd..bc5e39b0b3 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -1,6 +1,5 @@ from __future__ import annotations -import logging import re import aiohttp @@ -42,10 +41,11 @@ async def _convert_attachment(attachment: discord.Attachment) -> paste_service.P @commands.Cog.listener() async def on_message(self, message: discord.Message) -> None: """Listens for messages containing attachments and offers to upload them to the pastebin.""" - # Check if the message contains an embedded file and is not sent by a bot + # Check if the message contains an embedded file and is not sent by a bot. if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments): return + # Offer to upload the attachments and wait for the user's reaction. bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) @@ -57,21 +57,22 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - ) try: - # Wait for the reaction with a timeout of 60 seconds + # Wait for the reaction with a timeout of 60 seconds. await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) except TimeoutError: + # The user does not grant permission before the timeout. Exit early. await bot_reply.edit(content=f"~~{bot_reply.content}~~") await bot_reply.clear_reactions() return - logging.info({f.filename: f.content_type for f in message.attachments}) - + # Extract the attachments. files = [ await self._convert_attachment(f) for f in message.attachments if f.content_type.startswith("text") ] + # Upload the files to the paste bin, exiting early if there's an error. try: async with aiohttp.ClientSession() as session: paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) @@ -82,14 +83,18 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - await bot_reply.edit(content="There was an error uploading your paste.") return + # Send the user a DM with the delete link for the paste. # The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a # preview, thereby deleting the paste await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.") + # Edit the bot message to contain the link to the paste. await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.") await bot_reply.clear_reactions() await bot_reply.add_reaction(DELETE_PASTE_EMOJI) + # Wait for the user to react with a trash can, which they can use to delete the paste. + def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool: return ( reaction.message.id == bot_reply.id From 15c3e5e042d16f5076a9e6389a25d40b5d269b05 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 20:15:56 -0500 Subject: [PATCH 09/12] Add logging; expand initial message from bot; fix bug in deleting the paste. The expanded initial message tells the user that uploading to the paste bin is for accessibility. I hallucinated that PasteResponse objects have a delete method, which they do not. --- .../utils/attachment_pastebin_uploader.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index bc5e39b0b3..1568146861 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -9,6 +9,9 @@ from bot.bot import Bot from bot.constants import Emojis +from bot.log import get_logger + +log = get_logger(__name__) PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark DELETE_PASTE_EMOJI = Emojis.trashcan @@ -45,8 +48,13 @@ async def on_message(self, message: discord.Message) -> None: if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments): return + log.trace(f"Offering to upload attachments for {message.author} in {message.channel}, message {message.id}") + # Offer to upload the attachments and wait for the user's reaction. - bot_reply = await message.reply(f"React with {PASTEBIN_UPLOAD_EMOJI} to upload your file to our paste bin") + bot_reply = await message.reply( + f"Please react with {PASTEBIN_UPLOAD_EMOJI} to upload your file(s) to our " + f"[paste bin](), which is more accessible for some users." + ) await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI) def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool: @@ -61,6 +69,7 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission) except TimeoutError: # The user does not grant permission before the timeout. Exit early. + log.trace(f"{message.author} didn't give permission to upload {message.id} content; aborting.") await bot_reply.edit(content=f"~~{bot_reply.content}~~") await bot_reply.clear_reactions() return @@ -73,13 +82,16 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - ] # Upload the files to the paste bin, exiting early if there's an error. + log.trace(f"Attempting to upload {len(files)} file(s) to pastebin.") try: async with aiohttp.ClientSession() as session: paste_response = await paste_service.send_to_paste_service(files=files, http_session=session) except (paste_service.PasteTooLongError, ValueError): + log.trace(f"{message.author}'s attachments were too long.") await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.") return except paste_service.PasteUploadError: + log.trace(f"Unexpected error uploading {message.author}'s attachments.") await bot_reply.edit(content="There was an error uploading your paste.") return @@ -103,11 +115,14 @@ def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> ) try: + log.trace(f"Offering to delete {message.author}'s attachments in {message.channel}, message {message.id}") await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction) - await paste_response.delete() + # Delete the paste by visiting the removal URL. + async with aiohttp.ClientSession() as session: + await session.get(paste_response.removal) await bot_reply.delete() except TimeoutError: - pass + log.trace(f"Offer to delete {message.author}'s attachments timed out.") async def setup(bot: Bot) -> None: From c19aeaab3704c60c6dda9b7ed3d207cfc3ff50aa Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Wed, 29 Jan 2025 20:49:18 -0500 Subject: [PATCH 10/12] Account for messages with attachments being deleted before author reacts. Messages might be deleted immediately if the message or the attachment trips a filter, in which case we don't want the user to be able to upload them. --- bot/exts/utils/attachment_pastebin_uploader.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index 1568146861..6c54a4a2b1 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -33,6 +33,7 @@ class EmbedFileHandler(commands.Cog): def __init__(self, bot: Bot): self.bot = bot + self.pending_messages = set[int]() @staticmethod async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile: @@ -41,6 +42,11 @@ async def _convert_attachment(attachment: discord.Attachment) -> paste_service.P file_content = (await attachment.read()).decode(encoding) return paste_service.PasteFile(content=file_content, name=attachment.filename) + @commands.Cog.listener() + async def on_message_delete(self, message: discord.Message) -> None: + """Allows us to know which messages with attachments have been deleted.""" + self.pending_messages.discard(message.id) + @commands.Cog.listener() async def on_message(self, message: discord.Message) -> None: """Listens for messages containing attachments and offers to upload them to the pastebin.""" @@ -49,6 +55,7 @@ async def on_message(self, message: discord.Message) -> None: return log.trace(f"Offering to upload attachments for {message.author} in {message.channel}, message {message.id}") + self.pending_messages.add(message.id) # Offer to upload the attachments and wait for the user's reaction. bot_reply = await message.reply( @@ -72,8 +79,15 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - log.trace(f"{message.author} didn't give permission to upload {message.id} content; aborting.") await bot_reply.edit(content=f"~~{bot_reply.content}~~") await bot_reply.clear_reactions() + + if message.id not in self.pending_messages: + log.trace(f"{message.author}'s message was deleted before the attachments could be uploaded; aborting.") + await bot_reply.delete() return + # In either case, we do not want the message ID in pending_messages anymore. + self.pending_messages.discard(message.id) + # Extract the attachments. files = [ await self._convert_attachment(f) From a5cf653f233451dd2caf06fb20aeb46d6a737ef6 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Thu, 30 Jan 2025 00:00:35 -0500 Subject: [PATCH 11/12] Remove messages related to disallowed text-like files. These files will be made allowed. Also move `TXT_LIKE_FILES` to the other module that uses it. --- bot/exts/filtering/_filter_lists/extension.py | 44 +++++-------------- bot/exts/utils/snekbox/_cog.py | 3 +- 2 files changed, 13 insertions(+), 34 deletions(-) diff --git a/bot/exts/filtering/_filter_lists/extension.py b/bot/exts/filtering/_filter_lists/extension.py index a89a980a7e..e485640924 100644 --- a/bot/exts/filtering/_filter_lists/extension.py +++ b/bot/exts/filtering/_filter_lists/extension.py @@ -4,7 +4,7 @@ from os.path import splitext import bot -from bot.constants import Channels, Emojis +from bot.constants import Channels from bot.exts.filtering._filter_context import Event, FilterContext from bot.exts.filtering._filter_lists.filter_list import FilterList, ListType from bot.exts.filtering._filters.extension import ExtensionFilter @@ -14,27 +14,12 @@ if typing.TYPE_CHECKING: from bot.exts.filtering.filtering import Filtering -PASTE_URL = "https://paste.pythondiscord.com" -PY_EMBED_DESCRIPTION = ( - "It looks like you tried to attach a Python file - " - f"please use a code-pasting service such as {PASTE_URL}" -) - -TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"} -TXT_EMBED_DESCRIPTION = ( - "You either uploaded a `{blocked_extension}` file or entered a message that was too long. " - f"Please use our [paste bin]({PASTE_URL}) instead." -) - DISALLOWED_EMBED_DESCRIPTION = ( "It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). " "We currently allow the following file types: **{joined_whitelist}**.\n\n" "Feel free to ask in {meta_channel_mention} if you think this is a mistake." ) -PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark -DELETE_PASTE_EMOJI = Emojis.trashcan - class ExtensionsList(FilterList[ExtensionFilter]): """ @@ -90,30 +75,23 @@ async def actions_for( not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext} if ctx.event == Event.SNEKBOX: - not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES} + not_allowed = dict(not_allowed.items()) if not not_allowed: # Yes, it's a double negative. Meaning all attachments are allowed :) return None, [], {ListType.ALLOW: triggered} # At this point, something is disallowed. if ctx.event != Event.SNEKBOX: # Don't post the embed if it's a snekbox response. - if ".py" in not_allowed: - # Provide a pastebin link for .py files. - ctx.dm_embed = PY_EMBED_DESCRIPTION - elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}: - # Work around Discord auto-conversion of messages longer than 2000 chars to .txt - ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop()) - else: - meta_channel = bot.instance.get_channel(Channels.meta) - if not self._whitelisted_description: - self._whitelisted_description = ", ".join( - filter_.content for filter_ in self[ListType.ALLOW].filters.values() - ) - ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format( - joined_whitelist=self._whitelisted_description, - joined_blacklist=", ".join(not_allowed), - meta_channel_mention=meta_channel.mention, + meta_channel = bot.instance.get_channel(Channels.meta) + if not self._whitelisted_description: + self._whitelisted_description = ", ".join( + filter_.content for filter_ in self[ListType.ALLOW].filters.values() ) + ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format( + joined_whitelist=self._whitelisted_description, + joined_blacklist=", ".join(not_allowed), + meta_channel_mention=meta_channel.mention, + ) ctx.matches += not_allowed.values() ctx.blocked_exts |= set(not_allowed) diff --git a/bot/exts/utils/snekbox/_cog.py b/bot/exts/utils/snekbox/_cog.py index d448898f3c..f13ede51ad 100644 --- a/bot/exts/utils/snekbox/_cog.py +++ b/bot/exts/utils/snekbox/_cog.py @@ -17,7 +17,6 @@ from bot.bot import Bot from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs from bot.decorators import redirect_output -from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES from bot.exts.help_channels._channel import is_help_forum_post from bot.exts.utils.snekbox._eval import EvalJob, EvalResult from bot.exts.utils.snekbox._io import FileAttachment @@ -32,6 +31,8 @@ ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m") ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}") +TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"} + # The timeit command should only output the very last line, so all other output should be suppressed. # This will be used as the setup code along with any setup code provided. TIMEIT_SETUP_WRAPPER = """ From f022f5f814f7306991c23846a6640e0b13c58e15 Mon Sep 17 00:00:00 2001 From: Steele Farnsworth Date: Thu, 30 Jan 2025 18:06:24 -0500 Subject: [PATCH 12/12] Change how a file is determined to be text-like. Decode text with correct encoding. Attachments with "charset" in their content type are presumed to be text. The specified charset is always used to decode the text. --- bot/exts/filtering/filtering.py | 5 +++-- bot/exts/utils/attachment_pastebin_uploader.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/bot/exts/filtering/filtering.py b/bot/exts/filtering/filtering.py index e1483e18f4..377cff0152 100644 --- a/bot/exts/filtering/filtering.py +++ b/bot/exts/filtering/filtering.py @@ -68,7 +68,8 @@ async def _extract_text_file_content(att: discord.Attachment) -> str: """Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment.""" - file_lines: list[str] = (await att.read()).decode().splitlines() + file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1) + file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines() first_n_lines = "\n".join(file_lines[:30])[:2_000] return f"{att.filename}: {first_n_lines}" @@ -233,7 +234,7 @@ async def on_message(self, msg: Message) -> None: text_contents = [ await _extract_text_file_content(a) - for a in msg.attachments if a.content_type.startswith("text") + for a in msg.attachments if "charset" in a.content_type ] if text_contents: attachment_content = "\n\n".join(text_contents) diff --git a/bot/exts/utils/attachment_pastebin_uploader.py b/bot/exts/utils/attachment_pastebin_uploader.py index 6c54a4a2b1..805abd2386 100644 --- a/bot/exts/utils/attachment_pastebin_uploader.py +++ b/bot/exts/utils/attachment_pastebin_uploader.py @@ -92,7 +92,7 @@ def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) - files = [ await self._convert_attachment(f) for f in message.attachments - if f.content_type.startswith("text") + if "charset" in f.content_type ] # Upload the files to the paste bin, exiting early if there's an error.