From 7fd31da79bb071bfa1ec465bfc408d5601a66b8e Mon Sep 17 00:00:00 2001
From: DevOps117 <55235206+devops117@users.noreply.github.com>
Date: Mon, 28 Mar 2022 20:07:28 +0530
Subject: [PATCH 1/2] Revert "Format code."

---
 SaitamaRobot/modules/cust_filters.py           | 13 ++++++++++---
 SaitamaRobot/modules/feds.py                   |  4 +---
 SaitamaRobot/modules/helper_funcs/msg_types.py |  9 ++-------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/SaitamaRobot/modules/cust_filters.py b/SaitamaRobot/modules/cust_filters.py
index 350ea1b94b..d464205df4 100644
--- a/SaitamaRobot/modules/cust_filters.py
+++ b/SaitamaRobot/modules/cust_filters.py
@@ -113,6 +113,7 @@ def filters(update, context):
         if "Nested entities are not supported for Markdown version 1" in e.args:
             msg.reply_text("Nested entities are currently not supported.")
 
+
     conn = connected(context.bot, update, chat, user.id)
     if not conn is False:
         chat_id = conn
@@ -158,7 +159,9 @@ def filters(update, context):
         offset = len(extracted[1]) - len(
             msg.text
         )  # set correct offset relative to command + notename
-        text, buttons = button_markdown_parser(extracted[1][offset:])
+        text, buttons = button_markdown_parser(
+            extracted[1][offset:]
+        )
         text = text.strip()
         if not text:
             send_message(
@@ -182,7 +185,9 @@ def filters(update, context):
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
-        text, buttons = button_markdown_parser(text_to_parsing[offset:])
+        text, buttons = button_markdown_parser(
+            text_to_parsing[offset:]
+        )
         text = text.strip()
 
     elif not text and not file_type:
@@ -207,7 +212,9 @@ def filters(update, context):
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
-        text, buttons = button_markdown_parser(text_to_parsing[offset:])
+        text, buttons = button_markdown_parser(
+            text_to_parsing[offset:]
+        )
         text = text.strip()
         if (msg.reply_to_message.text or msg.reply_to_message.caption) and not text:
             send_message(
diff --git a/SaitamaRobot/modules/feds.py b/SaitamaRobot/modules/feds.py
index d07404b95a..3dbb768a02 100644
--- a/SaitamaRobot/modules/feds.py
+++ b/SaitamaRobot/modules/feds.py
@@ -1291,9 +1291,7 @@ def fed_broadcast(update: Update, context: CallbackContext):
             return
         # Parsing md
         try:
-            raw_text = msg.text_markdown_urled.split(
-                None, 1
-            )  # use python's maxsplit to separate cmd and args
+            raw_text = msg.text_markdown_urled.split(None, 1)  # use python's maxsplit to separate cmd and args
         except ValueError as e:
             if "Nested entities are not supported for Markdown version 1" in e.args:
                 msg.reply_text("Nested entities are currently not supported.")
diff --git a/SaitamaRobot/modules/helper_funcs/msg_types.py b/SaitamaRobot/modules/helper_funcs/msg_types.py
index a91aa54ca0..1f1cec91cd 100644
--- a/SaitamaRobot/modules/helper_funcs/msg_types.py
+++ b/SaitamaRobot/modules/helper_funcs/msg_types.py
@@ -45,10 +45,7 @@ def get_note_type(msg: Message):
             if msg.reply_to_message.caption
             else msg.reply_to_message.parse_entities()
         )
-        msgtext = (
-            msg.reply_to_message.text_markdown_urled
-            or msg.reply_to_message.caption_markdown_urled
-        )
+        msgtext = msg.reply_to_message.text_markdown_urled or msg.reply_to_message.caption_markdown_urled
 
         if len(args) >= 2 and msg.reply_to_message.text:  # not caption, text
             text, buttons = button_markdown_parser(msgtext)
@@ -149,9 +146,7 @@ def get_welcome_type(msg: Message):
     if args:
         if msg.reply_to_message:
             argumen = (
-                msg.reply_to_message.caption_markdown_urled
-                if msg.reply_to_message.caption_markdown_urled
-                else ""
+                msg.reply_to_message.caption_markdown_urled if msg.reply_to_message.caption_markdown_urled else ""
             )
             offset = 0  # offset is no need since target was in reply
         else:

From 294be1d44a3f3e1bae711bb8b5fccc6afb177a34 Mon Sep 17 00:00:00 2001
From: devops117 <55235206+devops117@users.noreply.github.com>
Date: Mon, 28 Mar 2022 20:10:19 +0530
Subject: [PATCH 2/2] Revert "Use caption_markdown_urled everywhere, for notes,
 rules, feds, etc"

This reverts commit 1f169bf885a1779290387b4db42e8423b5b6db70.
---
 SaitamaRobot/modules/cust_filters.py          | 51 ++++------
 SaitamaRobot/modules/feds.py                  | 31 +++----
 .../modules/helper_funcs/msg_types.py         | 34 ++++---
 .../modules/helper_funcs/string_handling.py   | 92 +++++++++++++++++--
 SaitamaRobot/modules/notes.py                 |  8 +-
 SaitamaRobot/modules/rules.py                 | 17 ++--
 SaitamaRobot/modules/welcome.py               | 27 ++++--
 7 files changed, 167 insertions(+), 93 deletions(-)

diff --git a/SaitamaRobot/modules/cust_filters.py b/SaitamaRobot/modules/cust_filters.py
index d464205df4..76d218a8a6 100644
--- a/SaitamaRobot/modules/cust_filters.py
+++ b/SaitamaRobot/modules/cust_filters.py
@@ -105,14 +105,9 @@ def filters(update, context):
     chat = update.effective_chat
     user = update.effective_user
     msg = update.effective_message
-    try:
-        args = msg.text_markdown_urled.split(
-            None, 1
-        )  # use python's maxsplit to separate Cmd, keyword, and reply_text
-    except ValueError as e:
-        if "Nested entities are not supported for Markdown version 1" in e.args:
-            msg.reply_text("Nested entities are currently not supported.")
-
+    args = msg.text.split(
+        None, 1
+    )  # use python's maxsplit to separate Cmd, keyword, and reply_text
 
     conn = connected(context.bot, update, chat, user.id)
     if not conn is False:
@@ -160,7 +155,7 @@ def filters(update, context):
             msg.text
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            extracted[1][offset:]
+            extracted[1], entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
         if not text:
@@ -171,22 +166,17 @@ def filters(update, context):
             return
 
     elif msg.reply_to_message and len(args) >= 2:
-        try:
-            if msg.reply_to_message.text:
-                text_to_parsing = msg.reply_to_message.text_markdown_urled
-            elif msg.reply_to_message.caption:
-                text_to_parsing = msg.reply_to_message.caption_markdown_urled
-            else:
-                text_to_parsing = ""
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-
+        if msg.reply_to_message.text:
+            text_to_parsing = msg.reply_to_message.text
+        elif msg.reply_to_message.caption:
+            text_to_parsing = msg.reply_to_message.caption
+        else:
+            text_to_parsing = ""
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            text_to_parsing[offset:]
+            text_to_parsing, entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
 
@@ -198,22 +188,17 @@ def filters(update, context):
         return
 
     elif msg.reply_to_message:
-        try:
-            if msg.reply_to_message.text:
-                text_to_parsing = msg.reply_to_message.text_markdown_urled
-            elif msg.reply_to_message.caption:
-                text_to_parsing = msg.reply_to_message.caption_markdown_urled
-            else:
-                text_to_parsing = ""
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-
+        if msg.reply_to_message.text:
+            text_to_parsing = msg.reply_to_message.text
+        elif msg.reply_to_message.caption:
+            text_to_parsing = msg.reply_to_message.caption
+        else:
+            text_to_parsing = ""
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            text_to_parsing[offset:]
+            text_to_parsing, entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
         if (msg.reply_to_message.text or msg.reply_to_message.caption) and not text:
diff --git a/SaitamaRobot/modules/feds.py b/SaitamaRobot/modules/feds.py
index 3dbb768a02..89fb6799ea 100644
--- a/SaitamaRobot/modules/feds.py
+++ b/SaitamaRobot/modules/feds.py
@@ -24,6 +24,7 @@
     extract_user,
     extract_user_fban,
 )
+from SaitamaRobot.modules.helper_funcs.string_handling import markdown_parser
 from telegram import (
     InlineKeyboardButton,
     InlineKeyboardMarkup,
@@ -1212,16 +1213,16 @@ def set_frules(update: Update, context: CallbackContext):
 
     if len(args) >= 1:
         msg = update.effective_message
-        try:
-            raw_text = msg.text_markdown_urled
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-                return
-
+        raw_text = msg.text
         args = raw_text.split(None, 1)  # use python's maxsplit to separate cmd and args
         if len(args) == 2:
-            markdown_rules = args[1]
+            txt = args[1]
+            offset = len(txt) - len(raw_text)  # set correct offset relative to command
+            markdown_rules = markdown_parser(
+                txt,
+                entities=msg.parse_entities(),
+                offset=offset,
+            )
         x = sql.set_frules(fed_id, markdown_rules)
         if not x:
             update.effective_message.reply_text(
@@ -1290,14 +1291,12 @@ def fed_broadcast(update: Update, context: CallbackContext):
             update.effective_message.reply_text("Only federation owners can do this!")
             return
         # Parsing md
-        try:
-            raw_text = msg.text_markdown_urled.split(None, 1)  # use python's maxsplit to separate cmd and args
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-                return
-
-        text = args[1]
+        raw_text = msg.text
+        args = raw_text.split(None, 1)  # use python's maxsplit to separate cmd and args
+        txt = args[1]
+        offset = len(txt) - len(raw_text)  # set correct offset relative to command
+        text_parser = markdown_parser(txt, entities=msg.parse_entities(), offset=offset)
+        text = text_parser
         try:
             broadcaster = user.first_name
         except:
diff --git a/SaitamaRobot/modules/helper_funcs/msg_types.py b/SaitamaRobot/modules/helper_funcs/msg_types.py
index 1f1cec91cd..2fb7274e57 100644
--- a/SaitamaRobot/modules/helper_funcs/msg_types.py
+++ b/SaitamaRobot/modules/helper_funcs/msg_types.py
@@ -20,8 +20,7 @@ def get_note_type(msg: Message):
     data_type = None
     content = None
     text = ""
-    raw_text = msg.text_markdown_urled or msg.caption_markdown_urled
-
+    raw_text = msg.text or msg.caption
     args = raw_text.split(None, 2)  # use python's maxsplit to separate cmd and args
     note_name = args[1]
 
@@ -32,7 +31,9 @@ def get_note_type(msg: Message):
             raw_text,
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            args[2][offset:],
+            args[2],
+            entities=msg.parse_entities() or msg.parse_caption_entities(),
+            offset=offset,
         )
         if buttons:
             data_type = Types.BUTTON_TEXT
@@ -45,10 +46,9 @@ def get_note_type(msg: Message):
             if msg.reply_to_message.caption
             else msg.reply_to_message.parse_entities()
         )
-        msgtext = msg.reply_to_message.text_markdown_urled or msg.reply_to_message.caption_markdown_urled
-
+        msgtext = msg.reply_to_message.text or msg.reply_to_message.caption
         if len(args) >= 2 and msg.reply_to_message.text:  # not caption, text
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             if buttons:
                 data_type = Types.BUTTON_TEXT
             else:
@@ -60,27 +60,27 @@ def get_note_type(msg: Message):
 
         elif msg.reply_to_message.document:
             content = msg.reply_to_message.document.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.DOCUMENT
 
         elif msg.reply_to_message.photo:
             content = msg.reply_to_message.photo[-1].file_id  # last elem = best quality
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.PHOTO
 
         elif msg.reply_to_message.audio:
             content = msg.reply_to_message.audio.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.AUDIO
 
         elif msg.reply_to_message.voice:
             content = msg.reply_to_message.voice.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.VOICE
 
         elif msg.reply_to_message.video:
             content = msg.reply_to_message.video.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.VIDEO
 
     return note_name, text, data_type, content, buttons
@@ -95,9 +95,9 @@ def get_welcome_type(msg: Message):
     try:
         if msg.reply_to_message:
             if msg.reply_to_message.text:
-                args = msg.reply_to_message.text_markdown_urled
+                args = msg.reply_to_message.text
             else:
-                args = msg.reply_to_message.caption_markdown_urled
+                args = msg.reply_to_message.caption
         else:
             args = msg.text.split(
                 None,
@@ -146,16 +146,20 @@ def get_welcome_type(msg: Message):
     if args:
         if msg.reply_to_message:
             argumen = (
-                msg.reply_to_message.caption_markdown_urled if msg.reply_to_message.caption_markdown_urled else ""
+                msg.reply_to_message.caption if msg.reply_to_message.caption else ""
             )
             offset = 0  # offset is no need since target was in reply
+            entities = msg.reply_to_message.parse_entities()
         else:
             argumen = args[1]
             offset = len(argumen) - len(
                 msg.text,
             )  # set correct offset relative to command + notename
+            entities = msg.parse_entities()
         text, buttons = button_markdown_parser(
-            argumen[offset:],
+            argumen,
+            entities=entities,
+            offset=offset,
         )
 
     if not data_type:
diff --git a/SaitamaRobot/modules/helper_funcs/string_handling.py b/SaitamaRobot/modules/helper_funcs/string_handling.py
index a68de2553d..35420ad89a 100644
--- a/SaitamaRobot/modules/helper_funcs/string_handling.py
+++ b/SaitamaRobot/modules/helper_funcs/string_handling.py
@@ -57,17 +57,97 @@ def _calc_emoji_offset(to_calc) -> int:
     return sum(len(e.group(0).encode("utf-16-le")) // 2 - 1 for e in emoticons)
 
 
+def markdown_parser(
+    txt: str,
+    entities: Dict[MessageEntity, str] = None,
+    offset: int = 0,
+) -> str:
+    """
+    Parse a string, escaping all invalid markdown entities.
+
+    Escapes URL's so as to avoid URL mangling.
+    Re-adds any telegram code entities obtained from the entities object.
+
+    :param txt: text to parse
+    :param entities: dict of message entities in text
+    :param offset: message offset - command and notename length
+    :return: valid markdown string
+    """
+    if not entities:
+        entities = {}
+    if not txt:
+        return ""
+
+    prev = 0
+    res = ""
+    # Loop over all message entities, and:
+    # reinsert code
+    # escape free-standing urls
+    for ent, ent_text in entities.items():
+        if ent.offset < -offset:
+            continue
+
+        start = ent.offset + offset  # start of entity
+        end = ent.offset + offset + ent.length - 1  # end of entity
+
+        # we only care about code, url, text links
+        if ent.type in ("code", "url", "text_link"):
+            # count emoji to switch counter
+            count = _calc_emoji_offset(txt[:start])
+            start -= count
+            end -= count
+
+            # URL handling -> do not escape if in [](), escape otherwise.
+            if ent.type == "url":
+                if any(
+                    match.start(1) <= start and end <= match.end(1)
+                    for match in LINK_REGEX.finditer(txt)
+                ):
+                    continue
+                # else, check the escapes between the prev and last and forcefully escape the url to avoid mangling
+                else:
+                    # TODO: investigate possible offset bug when lots of emoji are present
+                    res += _selective_escape(txt[prev:start] or "") + escape_markdown(
+                        ent_text,
+                    )
+
+            # code handling
+            elif ent.type == "code":
+                res += _selective_escape(txt[prev:start]) + "`" + ent_text + "`"
+
+            # handle markdown/html links
+            elif ent.type == "text_link":
+                res += _selective_escape(txt[prev:start]) + "[{}]({})".format(
+                    ent_text,
+                    ent.url,
+                )
+
+            end += 1
+
+        # anything else
+        else:
+            continue
+
+        prev = end
+
+    res += _selective_escape(txt[prev:])  # add the rest of the text
+    return res
+
+
 def button_markdown_parser(
-    note: str,
+    txt: str,
+    entities: Dict[MessageEntity, str] = None,
+    offset: int = 0,
 ) -> (str, List):
+    markdown_note = markdown_parser(txt, entities, offset)
     prev = 0
     note_data = ""
     buttons = []
-    for match in BTN_URL_REGEX.finditer(note):
+    for match in BTN_URL_REGEX.finditer(markdown_note):
         # Check if btnurl is escaped
         n_escapes = 0
         to_check = match.start(1) - 1
-        while to_check > 0 and note[to_check] == "\\":
+        while to_check > 0 and markdown_note[to_check] == "\\":
             n_escapes += 1
             to_check -= 1
 
@@ -75,14 +155,14 @@ def button_markdown_parser(
         if n_escapes % 2 == 0:
             # create a thruple with button label, url, and newline status
             buttons.append((match.group(2), match.group(3), bool(match.group(4))))
-            note_data += note[prev : match.start(1)]
+            note_data += markdown_note[prev : match.start(1)]
             prev = match.end(1)
         # if odd, escaped -> move along
         else:
-            note_data += note[prev:to_check]
+            note_data += markdown_note[prev:to_check]
             prev = match.start(1) - 1
     else:
-        note_data += note[prev:]
+        note_data += markdown_note[prev:]
 
     return note_data, buttons
 
diff --git a/SaitamaRobot/modules/notes.py b/SaitamaRobot/modules/notes.py
index 175a4a96cb..eb1c5e10a9 100644
--- a/SaitamaRobot/modules/notes.py
+++ b/SaitamaRobot/modules/notes.py
@@ -417,13 +417,7 @@ def save(update: Update, context: CallbackContext):
     if len(m) == 1:
         msg.reply_text("Provide something to save.")
         return
-    try:
-        note_name, text, data_type, content, buttons = get_note_type(msg)
-    except ValueError as e:
-        if "Nested entities are not supported for Markdown version 1" in e.args:
-            msg.reply_text("Nested entities are currently not supported.")
-            return
-
+    note_name, text, data_type, content, buttons = get_note_type(msg)
     note_name = note_name.lower()
     if data_type is None:
         msg.reply_text("Dude, there's no note")
diff --git a/SaitamaRobot/modules/rules.py b/SaitamaRobot/modules/rules.py
index 53d8ab9426..aec1d1ad3e 100644
--- a/SaitamaRobot/modules/rules.py
+++ b/SaitamaRobot/modules/rules.py
@@ -3,6 +3,7 @@
 import SaitamaRobot.modules.sql.rules_sql as sql
 from SaitamaRobot import dispatcher
 from SaitamaRobot.modules.helper_funcs.chat_status import user_admin
+from SaitamaRobot.modules.helper_funcs.string_handling import markdown_parser
 from telegram import (
     InlineKeyboardButton,
     InlineKeyboardMarkup,
@@ -94,15 +95,17 @@ def send_rules(update, chat_id, from_pm=False):
 def set_rules(update: Update, context: CallbackContext):
     chat_id = update.effective_chat.id
     msg = update.effective_message  # type: Optional[Message]
-    try:
-        raw_text = msg.text_markdown_urled
-    except ValueError as e:
-        if "Nested entities are not supported for Markdown version 1" in e.args:
-            msg.reply_text("Nested entities are currently not supported.")
-
+    raw_text = msg.text
     args = raw_text.split(None, 1)  # use python's maxsplit to separate cmd and args
     if len(args) == 2:
-        markdown_rules = args[1]
+        txt = args[1]
+        offset = len(txt) - len(raw_text)  # set correct offset relative to command
+        markdown_rules = markdown_parser(
+            txt,
+            entities=msg.parse_entities(),
+            offset=offset,
+        )
+
         sql.set_rules(chat_id, markdown_rules)
         update.effective_message.reply_text("Successfully set rules for this group.")
 
diff --git a/SaitamaRobot/modules/welcome.py b/SaitamaRobot/modules/welcome.py
index 0c4ca1606f..ff9f7f0e10 100644
--- a/SaitamaRobot/modules/welcome.py
+++ b/SaitamaRobot/modules/welcome.py
@@ -23,6 +23,7 @@
 from SaitamaRobot.modules.helper_funcs.msg_types import get_welcome_type
 from SaitamaRobot.modules.helper_funcs.string_handling import (
     escape_invalid_curly_brackets,
+    markdown_parser,
 )
 from SaitamaRobot.modules.log_channel import loggable
 from telegram import (
@@ -96,23 +97,29 @@ def send(update, message, keyboard, backup_message):
             )
         elif excp.message == "Button_url_invalid":
             msg = update.effective_message.reply_text(
-                backup_message + "\nNote: the current message has an invalid url "
-                "in one of its buttons. Please update.",
+                markdown_parser(
+                    backup_message + "\nNote: the current message has an invalid url "
+                    "in one of its buttons. Please update.",
+                ),
                 parse_mode=ParseMode.MARKDOWN,
                 reply_to_message_id=reply,
             )
         elif excp.message == "Unsupported url protocol":
             msg = update.effective_message.reply_text(
-                backup_message + "\nNote: the current message has buttons which "
-                "use url protocols that are unsupported by "
-                "telegram. Please update.",
+                markdown_parser(
+                    backup_message + "\nNote: the current message has buttons which "
+                    "use url protocols that are unsupported by "
+                    "telegram. Please update.",
+                ),
                 parse_mode=ParseMode.MARKDOWN,
                 reply_to_message_id=reply,
             )
         elif excp.message == "Wrong url host":
             msg = update.effective_message.reply_text(
-                backup_message + "\nNote: the current message has some bad urls. "
-                "Please update.",
+                markdown_parser(
+                    backup_message + "\nNote: the current message has some bad urls. "
+                    "Please update.",
+                ),
                 parse_mode=ParseMode.MARKDOWN,
                 reply_to_message_id=reply,
             )
@@ -123,8 +130,10 @@ def send(update, message, keyboard, backup_message):
             return
         else:
             msg = update.effective_message.reply_text(
-                backup_message + "\nNote: An error occured when sending the "
-                "custom message. Please update.",
+                markdown_parser(
+                    backup_message + "\nNote: An error occured when sending the "
+                    "custom message. Please update.",
+                ),
                 parse_mode=ParseMode.MARKDOWN,
                 reply_to_message_id=reply,
             )