diff --git a/core/database.py b/core/database.py
index 75aba77..5c31c05 100644
--- a/core/database.py
+++ b/core/database.py
@@ -21,7 +21,6 @@
import asyncio
import datetime
import logging
-import re
from typing import TYPE_CHECKING, Any, Self
import aiohttp
@@ -31,16 +30,18 @@
from . import utils
from .models import FileModel, PasteModel
+from .scanners import SecurityInfo, Services
if TYPE_CHECKING:
_Pool = asyncpg.Pool[asyncpg.Record]
from types_.config import Github
from types_.github import PostGist
+ from types_.scanner import ScannerSecret
else:
_Pool = asyncpg.Pool
-DISCORD_TOKEN_REGEX: re.Pattern[str] = re.compile(r"[a-zA-Z0-9_-]{23,28}\.[a-zA-Z0-9_-]{6,7}\.[a-zA-Z0-9_-]{27,}")
+
LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -53,7 +54,7 @@ def __init__(self, *, dsn: str, session: aiohttp.ClientSession | None = None, gi
self._handling_tokens = bool(self.session and github_config)
if self._handling_tokens:
- LOGGER.info("Will handle compromised discord info.")
+ LOGGER.info("Setup to handle Discord Tokens.")
assert github_config # guarded by if here
self._gist_token = github_config["token"]
@@ -83,20 +84,15 @@ async def _token_task(self) -> None:
await asyncio.sleep(self._gist_timeout)
- def _handle_discord_tokens(self, *bodies: dict[str, str], paste_id: str) -> None:
- formatted_bodies = "\n".join(b["content"] for b in bodies)
-
- tokens = list(DISCORD_TOKEN_REGEX.finditer(formatted_bodies))
-
- if not tokens:
+ def _handle_discord_tokens(self, tokens: list[str], paste_id: str) -> None:
+ if not self._handling_tokens or not tokens:
return
LOGGER.info(
"Discord bot token located and added to token bucket. Current bucket size is: %s", len(self.__tokens_bucket)
)
- tokens = "\n".join([m[0] for m in tokens])
- self.__tokens_bucket[paste_id] = tokens
+ self.__tokens_bucket[paste_id] = "\n".join(tokens)
async def _post_gist_of_tokens(self) -> None:
assert self.session # guarded in caller
@@ -211,8 +207,8 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
"""
file_query: str = """
- INSERT INTO files (parent_id, content, filename, loc, annotation)
- VALUES ($1, $2, $3, $4, $5)
+ INSERT INTO files (parent_id, content, filename, loc, annotation, warning_positions)
+ VALUES ($1, $2, $3, $4, $5, $6)
RETURNING *
"""
@@ -246,28 +242,39 @@ async def create_paste(self, *, data: dict[str, Any]) -> PasteModel:
name: str = (file.get("filename") or f"file_{index}")[-CONFIG["PASTES"]["name_limit"] :]
name = "_".join(name.splitlines())
- content: str = file["content"]
+ # Normalise newlines...
+ content: str = file["content"].replace("\r\n", "\n").replace("\r", "\n")
loc: int = file["content"].count("\n") + 1
- annotation: str = ""
- tokens = [t for t in utils.TOKEN_REGEX.findall(content) if utils.validate_discord_token(t)]
- if tokens:
- annotation = "Contains possibly sensitive information: Discord Token(s)"
- if not password:
- annotation += ", which have now been invalidated."
+ positions: list[int] = []
+ extra: str = ""
+
+ secrets: list[ScannerSecret] = SecurityInfo.scan_file(content)
+ for payload in secrets:
+ service: Services = payload["service"]
+
+ extra += f"{service.value}, "
+ positions += [t[0] for t in payload["tokens"]]
+
+ if not password and self._handling_tokens and service is Services.discord:
+ self._handle_discord_tokens(tokens=[t[1] for t in payload["tokens"]], paste_id=paste.id)
+
+ extra = extra.removesuffix(", ")
+ annotation = f"Contains possibly sensitive data from: {extra}" if extra else ""
row: asyncpg.Record | None = await connection.fetchrow(
- file_query, paste.id, content, name, loc, annotation
+ file_query,
+ paste.id,
+ content,
+ name,
+ loc,
+ annotation,
+ sorted(positions),
)
if row:
paste.files.append(FileModel(row))
- if not password:
- # if the user didn't provide a password (a public paste)
- # we check for discord tokens
- self._handle_discord_tokens(*data["files"], paste_id=paste.id)
-
return paste
async def fetch_paste_security(self, *, token: str) -> PasteModel | None:
diff --git a/core/models.py b/core/models.py
index ec21ae3..e8dab76 100644
--- a/core/models.py
+++ b/core/models.py
@@ -67,6 +67,7 @@ def __init__(self, record: asyncpg.Record | dict[str, Any]) -> None:
self.charcount: int = record["charcount"]
self.index: int = record["file_index"]
self.annotation: str = record["annotation"]
+ self.warning_positions: list[int] = record["warning_positions"]
class PasteModel(BaseModel):
diff --git a/core/scanners.py b/core/scanners.py
new file mode 100644
index 0000000..5d31dc8
--- /dev/null
+++ b/core/scanners.py
@@ -0,0 +1,134 @@
+"""MystBin. Share code easily.
+
+Copyright (C) 2020-Current PythonistaGuild
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see
{content}
+ {lines}{content}
"""
return html
diff --git a/web/index.html b/web/index.html
index 4fa4673..25f2cde 100644
--- a/web/index.html
+++ b/web/index.html
@@ -23,7 +23,7 @@
-
+
diff --git a/web/maint.html b/web/maint.html
index 61261ea..945c2a6 100644
--- a/web/maint.html
+++ b/web/maint.html
@@ -15,7 +15,7 @@
-
+
diff --git a/web/password.html b/web/password.html
index 5e217b3..2bcde32 100644
--- a/web/password.html
+++ b/web/password.html
@@ -14,18 +14,17 @@
-
-
+
-
+
@@ -56,7 +55,7 @@