Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Apply filtering to text attachments; offer to auto-upload text attachments to paste bin #3241

Merged
merged 13 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 10 additions & 29 deletions bot/exts/filtering/_filter_lists/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,6 @@
if typing.TYPE_CHECKING:
from bot.exts.filtering.filtering import Filtering

PASTE_URL = "https://paste.pythondiscord.com"
PY_EMBED_DESCRIPTION = (
"It looks like you tried to attach a Python file - "
f"please use a code-pasting service such as {PASTE_URL}"
)

TXT_LIKE_FILES = {".txt", ".csv", ".json"}
TXT_EMBED_DESCRIPTION = (
"You either uploaded a `{blocked_extension}` file or entered a message that was too long. "
f"Please use our [paste bin]({PASTE_URL}) instead."
)

DISALLOWED_EMBED_DESCRIPTION = (
"It looks like you tried to attach file type(s) that we do not allow ({joined_blacklist}). "
"We currently allow the following file types: **{joined_whitelist}**.\n\n"
Expand Down Expand Up @@ -87,30 +75,23 @@ async def actions_for(
not_allowed = {ext: filename for ext, filename in all_ext if ext not in allowed_ext}

if ctx.event == Event.SNEKBOX:
not_allowed = {ext: filename for ext, filename in not_allowed.items() if ext not in TXT_LIKE_FILES}
not_allowed = dict(not_allowed.items())

if not not_allowed: # Yes, it's a double negative. Meaning all attachments are allowed :)
return None, [], {ListType.ALLOW: triggered}

# At this point, something is disallowed.
if ctx.event != Event.SNEKBOX: # Don't post the embed if it's a snekbox response.
if ".py" in not_allowed:
# Provide a pastebin link for .py files.
ctx.dm_embed = PY_EMBED_DESCRIPTION
elif txt_extensions := {ext for ext in TXT_LIKE_FILES if ext in not_allowed}:
# Work around Discord auto-conversion of messages longer than 2000 chars to .txt
ctx.dm_embed = TXT_EMBED_DESCRIPTION.format(blocked_extension=txt_extensions.pop())
else:
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
meta_channel = bot.instance.get_channel(Channels.meta)
if not self._whitelisted_description:
self._whitelisted_description = ", ".join(
filter_.content for filter_ in self[ListType.ALLOW].filters.values()
)
ctx.dm_embed = DISALLOWED_EMBED_DESCRIPTION.format(
joined_whitelist=self._whitelisted_description,
joined_blacklist=", ".join(not_allowed),
meta_channel_mention=meta_channel.mention,
)

ctx.matches += not_allowed.values()
ctx.blocked_exts |= set(not_allowed)
Expand Down
5 changes: 1 addition & 4 deletions bot/exts/filtering/_filter_lists/filter_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,10 +157,7 @@ def __hash__(self):
return hash(id(self))


T = typing.TypeVar("T", bound=Filter)


class FilterList(dict[ListType, AtomicList], typing.Generic[T], FieldRequiring):
class FilterList[T: Filter](dict[ListType, AtomicList], FieldRequiring):
"""Dispatches events to lists of _filters, and aggregates the responses into a single list of actions to take."""

# Each subclass must define a name matching the filter_list name we're expecting to receive from the database.
Expand Down
14 changes: 5 additions & 9 deletions bot/exts/filtering/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,17 @@
from abc import abstractmethod
from copy import copy
from functools import reduce
from typing import Any, NamedTuple, Self, TypeVar
from typing import Any, NamedTuple, Self

from bot.exts.filtering._filter_context import FilterContext
from bot.exts.filtering._settings_types import settings_types
from bot.exts.filtering._settings_types.settings_entry import ActionEntry, SettingsEntry, ValidationEntry
from bot.exts.filtering._utils import FieldRequiring
from bot.log import get_logger

TSettings = TypeVar("TSettings", bound="Settings")

log = get_logger(__name__)

_already_warned: set[str] = set()

T = TypeVar("T", bound=SettingsEntry)
_already_warned = set[str]()


def create_settings(
Expand Down Expand Up @@ -55,7 +51,7 @@ def create_settings(
)


class Settings(FieldRequiring, dict[str, T]):
class Settings[T: SettingsEntry](FieldRequiring, dict[str, T]):
"""
A collection of settings.

Expand All @@ -69,7 +65,7 @@ class Settings(FieldRequiring, dict[str, T]):

entry_type: type[T]

_already_warned: set[str] = set()
_already_warned = set[str]()

@abstractmethod # ABCs have to have at least once abstract method to actually count as such.
def __init__(self, settings_data: dict, *, defaults: Settings | None = None, keep_empty: bool = False):
Expand Down Expand Up @@ -104,7 +100,7 @@ def overrides(self) -> dict[str, Any]:
"""Return a dictionary of overrides across all entries."""
return reduce(operator.or_, (entry.overrides for entry in self.values() if entry), {})

def copy(self: TSettings) -> TSettings:
def copy(self: Self) -> Self:
"""Create a shallow copy of the object."""
return copy(self)

Expand Down
19 changes: 18 additions & 1 deletion bot/exts/filtering/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@
WEEKLY_REPORT_ISO_DAY = 3 # 1=Monday, 7=Sunday


async def _extract_text_file_content(att: discord.Attachment) -> str:
"""Extract up to the first 30 lines and first 2000 characters (whichever is shorter) of an attachment."""
file_encoding = re.search(r"charset=(\S+)", att.content_type).group(1)
file_lines: list[str] = (await att.read()).decode(encoding=file_encoding).splitlines()
first_n_lines = "\n".join(file_lines[:30])[:2_000]
return f"{att.filename}: {first_n_lines}"


class Filtering(Cog):
"""Filtering and alerting for content posted on the server."""

Expand All @@ -80,7 +88,7 @@ class Filtering(Cog):
def __init__(self, bot: Bot):
self.bot = bot
self.filter_lists: dict[str, FilterList] = {}
self._subscriptions: defaultdict[Event, list[FilterList]] = defaultdict(list)
self._subscriptions = defaultdict[Event, list[FilterList]](list)
self.delete_scheduler = scheduling.Scheduler(self.__class__.__name__)
self.webhook: discord.Webhook | None = None

Expand Down Expand Up @@ -223,6 +231,15 @@ async def on_message(self, msg: Message) -> None:
self.message_cache.append(msg)

ctx = FilterContext.from_message(Event.MESSAGE, msg, None, self.message_cache)

text_contents = [
await _extract_text_file_content(a)
for a in msg.attachments if "charset" in a.content_type
]
if text_contents:
attachment_content = "\n\n".join(text_contents)
ctx = ctx.replace(content=f"{ctx.content}\n\n{attachment_content}")

result_actions, list_messages, triggers = await self._resolve_action(ctx)
self.message_cache.update(msg, metadata=triggers)
if result_actions:
Expand Down
144 changes: 144 additions & 0 deletions bot/exts/utils/attachment_pastebin_uploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from __future__ import annotations

import re

import aiohttp
import discord
from discord.ext import commands
from pydis_core.utils import paste_service

from bot.bot import Bot
from bot.constants import Emojis
from bot.log import get_logger

log = get_logger(__name__)

PASTEBIN_UPLOAD_EMOJI = Emojis.check_mark
DELETE_PASTE_EMOJI = Emojis.trashcan


class EmbedFileHandler(commands.Cog):
"""
Handles automatic uploading of attachments to the paste bin.

Whenever a user uploads one or more attachments that is text-based (py, txt, csv, etc.), this cog offers to upload
all the attachments to the paste bin automatically. The steps are as follows:
- The bot replies to the message containing the attachments, asking the user to react with a checkmark to consent
to having the content uploaded.
- If consent is given, the bot uploads the contents and edits its own message to contain the link.
- The bot DMs the user the delete link for the paste.
- The bot waits for the user to react with a trashcan emoji, in which case the bot deletes the paste and its own
message.
"""

def __init__(self, bot: Bot):
self.bot = bot
self.pending_messages = set[int]()

@staticmethod
async def _convert_attachment(attachment: discord.Attachment) -> paste_service.PasteFile:
"""Converts an attachment to a PasteFile, according to the attachment's file encoding."""
encoding = re.search(r"charset=(\S+)", attachment.content_type).group(1)
file_content = (await attachment.read()).decode(encoding)
return paste_service.PasteFile(content=file_content, name=attachment.filename)

@commands.Cog.listener()
async def on_message_delete(self, message: discord.Message) -> None:
"""Allows us to know which messages with attachments have been deleted."""
self.pending_messages.discard(message.id)

@commands.Cog.listener()
async def on_message(self, message: discord.Message) -> None:
"""Listens for messages containing attachments and offers to upload them to the pastebin."""
# Check if the message contains an embedded file and is not sent by a bot.
if message.author.bot or not any(a.content_type.startswith("text") for a in message.attachments):
return

log.trace(f"Offering to upload attachments for {message.author} in {message.channel}, message {message.id}")
self.pending_messages.add(message.id)

# Offer to upload the attachments and wait for the user's reaction.
bot_reply = await message.reply(
f"Please react with {PASTEBIN_UPLOAD_EMOJI} to upload your file(s) to our "
f"[paste bin](<https://paste.pythondiscord.com/>), which is more accessible for some users."
)
await bot_reply.add_reaction(PASTEBIN_UPLOAD_EMOJI)

def wait_for_upload_permission(reaction: discord.Reaction, user: discord.User) -> bool:
return (
reaction.message.id == bot_reply.id
and str(reaction.emoji) == PASTEBIN_UPLOAD_EMOJI
and user == message.author
)

try:
# Wait for the reaction with a timeout of 60 seconds.
await self.bot.wait_for("reaction_add", timeout=60.0, check=wait_for_upload_permission)
except TimeoutError:
# The user does not grant permission before the timeout. Exit early.
log.trace(f"{message.author} didn't give permission to upload {message.id} content; aborting.")
await bot_reply.edit(content=f"~~{bot_reply.content}~~")
await bot_reply.clear_reactions()

if message.id not in self.pending_messages:
log.trace(f"{message.author}'s message was deleted before the attachments could be uploaded; aborting.")
await bot_reply.delete()
return

# In either case, we do not want the message ID in pending_messages anymore.
self.pending_messages.discard(message.id)

# Extract the attachments.
files = [
await self._convert_attachment(f)
for f in message.attachments
if "charset" in f.content_type
]

# Upload the files to the paste bin, exiting early if there's an error.
log.trace(f"Attempting to upload {len(files)} file(s) to pastebin.")
try:
async with aiohttp.ClientSession() as session:
paste_response = await paste_service.send_to_paste_service(files=files, http_session=session)
except (paste_service.PasteTooLongError, ValueError):
log.trace(f"{message.author}'s attachments were too long.")
await bot_reply.edit(content="Your paste is too long, and couldn't be uploaded.")
return
except paste_service.PasteUploadError:
log.trace(f"Unexpected error uploading {message.author}'s attachments.")
await bot_reply.edit(content="There was an error uploading your paste.")
return

# Send the user a DM with the delete link for the paste.
# The angle brackets around the remove link are required to stop Discord from visiting the URL to produce a
# preview, thereby deleting the paste
await message.author.send(content=f"[Click here](<{paste_response.removal}>) to delete your recent paste.")

# Edit the bot message to contain the link to the paste.
await bot_reply.edit(content=f"[Click here]({paste_response.link}) to see this code in our pastebin.")
await bot_reply.clear_reactions()
await bot_reply.add_reaction(DELETE_PASTE_EMOJI)

# Wait for the user to react with a trash can, which they can use to delete the paste.

def wait_for_delete_reaction(reaction: discord.Reaction, user: discord.User) -> bool:
return (
reaction.message.id == bot_reply.id
and str(reaction.emoji) == DELETE_PASTE_EMOJI
and user == message.author
)

try:
log.trace(f"Offering to delete {message.author}'s attachments in {message.channel}, message {message.id}")
await self.bot.wait_for("reaction_add", timeout=60.0 * 10, check=wait_for_delete_reaction)
# Delete the paste by visiting the removal URL.
async with aiohttp.ClientSession() as session:
await session.get(paste_response.removal)
await bot_reply.delete()
except TimeoutError:
log.trace(f"Offer to delete {message.author}'s attachments timed out.")


async def setup(bot: Bot) -> None:
"""Load the EmbedFileHandler cog."""
await bot.add_cog(EmbedFileHandler(bot))
3 changes: 2 additions & 1 deletion bot/exts/utils/snekbox/_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from bot.bot import Bot
from bot.constants import BaseURLs, Channels, Emojis, MODERATION_ROLES, Roles, URLs
from bot.decorators import redirect_output
from bot.exts.filtering._filter_lists.extension import TXT_LIKE_FILES
from bot.exts.help_channels._channel import is_help_forum_post
from bot.exts.utils.snekbox._eval import EvalJob, EvalResult
from bot.exts.utils.snekbox._io import FileAttachment
Expand All @@ -32,6 +31,8 @@
ANSI_REGEX = re.compile(r"\N{ESC}\[[0-9;:]*m")
ESCAPE_REGEX = re.compile("[`\u202E\u200B]{3,}")

TXT_LIKE_FILES = {".txt", ".csv", ".json", ".py"}

# The timeit command should only output the very last line, so all other output should be suppressed.
# This will be used as the setup code along with any setup code provided.
TIMEIT_SETUP_WRAPPER = """
Expand Down
Loading