From b9c8488b27a99e7c80819e28ce56d7965c2e2220 Mon Sep 17 00:00:00 2001 From: i-ky Date: Wed, 8 Nov 2023 15:09:46 +0000 Subject: [PATCH] rss-bot: Add option to convert body to Markdown --- pyproject.toml | 1 + zulip/integrations/rss/requirements.txt | 1 + zulip/integrations/rss/rss-bot | 21 ++++++++++++++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ea90f90dd..5bb90ada7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ module = [ "gitlint.*", "googleapiclient.*", "irc.*", + "markdownify.*", "mercurial.*", "nio.*", "oauth2client.*", diff --git a/zulip/integrations/rss/requirements.txt b/zulip/integrations/rss/requirements.txt index 7bbec3415..96e1b4d72 100644 --- a/zulip/integrations/rss/requirements.txt +++ b/zulip/integrations/rss/requirements.txt @@ -1 +1,2 @@ feedparser>=6.0.10 +markdownify>=0.11.6 diff --git a/zulip/integrations/rss/rss-bot b/zulip/integrations/rss/rss-bot index f49342687..79bc39a6d 100755 --- a/zulip/integrations/rss/rss-bot +++ b/zulip/integrations/rss/rss-bot @@ -13,10 +13,12 @@ import re import sys import time import urllib.parse +from collections.abc import Callable from html.parser import HTMLParser from typing import Any, Dict, List, Optional, Tuple import feedparser +from markdownify import markdownify from typing_extensions import override import zulip @@ -92,6 +94,19 @@ parser.add_argument( help="Convert $ to $$ (for KaTeX processing)", default=False, ) +body = parser.add_mutually_exclusive_group() +body.add_argument( + "--strip", + dest="strip", + action="store_true", + help="Strip HTML tags from body", +) +body.add_argument( + "--markdownify", + dest="strip", + action="store_false", + help="Convert body from HTML to Markdown", +) opts = parser.parse_args() @@ -177,7 +192,11 @@ def send_zulip(entry: Any, feed_name: str) -> Dict[str, Any]: if opts.unwrap: body = unwrap_text(body) - content = f"**[{entry.title}]({entry.link})**\n{strip_tags(body)}\n{entry.link}" + def md(html: str) -> str: + return markdownify(html, escape_underscores=False) + + convert: Callable[[str], str] = strip_tags if opts.strip else md + content = f"**[{entry.title}]({entry.link})**\n{convert(body)}\n{entry.link}" if opts.math: content = content.replace("$", "$$")