Revert "Use caption_markdown_urled everywhere, for notes, rules, feds…

…, etc" This reverts commit 1f169bf.
Ghost-IU · Mar 28, 2022 · 294be1d · 294be1d
1 parent 7fd31da
commit 294be1d
Show file tree

Hide file tree

Showing 7 changed files with 167 additions and 93 deletions.
diff --git a/SaitamaRobot/modules/cust_filters.py b/SaitamaRobot/modules/cust_filters.py
@@ -105,14 +105,9 @@ def filters(update, context):
     chat = update.effective_chat
     user = update.effective_user
     msg = update.effective_message
-    try:
-        args = msg.text_markdown_urled.split(
-            None, 1
-        )  # use python's maxsplit to separate Cmd, keyword, and reply_text
-    except ValueError as e:
-        if "Nested entities are not supported for Markdown version 1" in e.args:
-            msg.reply_text("Nested entities are currently not supported.")
-
+    args = msg.text.split(
+        None, 1
+    )  # use python's maxsplit to separate Cmd, keyword, and reply_text
 
     conn = connected(context.bot, update, chat, user.id)
     if not conn is False:
@@ -160,7 +155,7 @@ def filters(update, context):
             msg.text
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            extracted[1][offset:]
+            extracted[1], entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
         if not text:
@@ -171,22 +166,17 @@ def filters(update, context):
             return
 
     elif msg.reply_to_message and len(args) >= 2:
-        try:
-            if msg.reply_to_message.text:
-                text_to_parsing = msg.reply_to_message.text_markdown_urled
-            elif msg.reply_to_message.caption:
-                text_to_parsing = msg.reply_to_message.caption_markdown_urled
-            else:
-                text_to_parsing = ""
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-
+        if msg.reply_to_message.text:
+            text_to_parsing = msg.reply_to_message.text
+        elif msg.reply_to_message.caption:
+            text_to_parsing = msg.reply_to_message.caption
+        else:
+            text_to_parsing = ""
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            text_to_parsing[offset:]
+            text_to_parsing, entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
 
@@ -198,22 +188,17 @@ def filters(update, context):
         return
 
     elif msg.reply_to_message:
-        try:
-            if msg.reply_to_message.text:
-                text_to_parsing = msg.reply_to_message.text_markdown_urled
-            elif msg.reply_to_message.caption:
-                text_to_parsing = msg.reply_to_message.caption_markdown_urled
-            else:
-                text_to_parsing = ""
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-
+        if msg.reply_to_message.text:
+            text_to_parsing = msg.reply_to_message.text
+        elif msg.reply_to_message.caption:
+            text_to_parsing = msg.reply_to_message.caption
+        else:
+            text_to_parsing = ""
         offset = len(
             text_to_parsing
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            text_to_parsing[offset:]
+            text_to_parsing, entities=msg.parse_entities(), offset=offset
         )
         text = text.strip()
         if (msg.reply_to_message.text or msg.reply_to_message.caption) and not text:

diff --git a/SaitamaRobot/modules/feds.py b/SaitamaRobot/modules/feds.py
@@ -24,6 +24,7 @@
     extract_user,
     extract_user_fban,
 )
+from SaitamaRobot.modules.helper_funcs.string_handling import markdown_parser
 from telegram import (
     InlineKeyboardButton,
     InlineKeyboardMarkup,
@@ -1212,16 +1213,16 @@ def set_frules(update: Update, context: CallbackContext):
 
     if len(args) >= 1:
         msg = update.effective_message
-        try:
-            raw_text = msg.text_markdown_urled
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-                return
-
+        raw_text = msg.text
         args = raw_text.split(None, 1)  # use python's maxsplit to separate cmd and args
         if len(args) == 2:
-            markdown_rules = args[1]
+            txt = args[1]
+            offset = len(txt) - len(raw_text)  # set correct offset relative to command
+            markdown_rules = markdown_parser(
+                txt,
+                entities=msg.parse_entities(),
+                offset=offset,
+            )
         x = sql.set_frules(fed_id, markdown_rules)
         if not x:
             update.effective_message.reply_text(
@@ -1290,14 +1291,12 @@ def fed_broadcast(update: Update, context: CallbackContext):
             update.effective_message.reply_text("Only federation owners can do this!")
             return
         # Parsing md
-        try:
-            raw_text = msg.text_markdown_urled.split(None, 1)  # use python's maxsplit to separate cmd and args
-        except ValueError as e:
-            if "Nested entities are not supported for Markdown version 1" in e.args:
-                msg.reply_text("Nested entities are currently not supported.")
-                return
-
-        text = args[1]
+        raw_text = msg.text
+        args = raw_text.split(None, 1)  # use python's maxsplit to separate cmd and args
+        txt = args[1]
+        offset = len(txt) - len(raw_text)  # set correct offset relative to command
+        text_parser = markdown_parser(txt, entities=msg.parse_entities(), offset=offset)
+        text = text_parser
         try:
             broadcaster = user.first_name
         except:

diff --git a/SaitamaRobot/modules/helper_funcs/msg_types.py b/SaitamaRobot/modules/helper_funcs/msg_types.py
@@ -20,8 +20,7 @@ def get_note_type(msg: Message):
     data_type = None
     content = None
     text = ""
-    raw_text = msg.text_markdown_urled or msg.caption_markdown_urled
-
+    raw_text = msg.text or msg.caption
     args = raw_text.split(None, 2)  # use python's maxsplit to separate cmd and args
     note_name = args[1]
 
@@ -32,7 +31,9 @@ def get_note_type(msg: Message):
             raw_text,
         )  # set correct offset relative to command + notename
         text, buttons = button_markdown_parser(
-            args[2][offset:],
+            args[2],
+            entities=msg.parse_entities() or msg.parse_caption_entities(),
+            offset=offset,
         )
         if buttons:
             data_type = Types.BUTTON_TEXT
@@ -45,10 +46,9 @@ def get_note_type(msg: Message):
             if msg.reply_to_message.caption
             else msg.reply_to_message.parse_entities()
         )
-        msgtext = msg.reply_to_message.text_markdown_urled or msg.reply_to_message.caption_markdown_urled
-
+        msgtext = msg.reply_to_message.text or msg.reply_to_message.caption
         if len(args) >= 2 and msg.reply_to_message.text:  # not caption, text
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             if buttons:
                 data_type = Types.BUTTON_TEXT
             else:
@@ -60,27 +60,27 @@ def get_note_type(msg: Message):
 
         elif msg.reply_to_message.document:
             content = msg.reply_to_message.document.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.DOCUMENT
 
         elif msg.reply_to_message.photo:
             content = msg.reply_to_message.photo[-1].file_id  # last elem = best quality
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.PHOTO
 
         elif msg.reply_to_message.audio:
             content = msg.reply_to_message.audio.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.AUDIO
 
         elif msg.reply_to_message.voice:
             content = msg.reply_to_message.voice.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.VOICE
 
         elif msg.reply_to_message.video:
             content = msg.reply_to_message.video.file_id
-            text, buttons = button_markdown_parser(msgtext)
+            text, buttons = button_markdown_parser(msgtext, entities=entities)
             data_type = Types.VIDEO
 
     return note_name, text, data_type, content, buttons
@@ -95,9 +95,9 @@ def get_welcome_type(msg: Message):
     try:
         if msg.reply_to_message:
             if msg.reply_to_message.text:
-                args = msg.reply_to_message.text_markdown_urled
+                args = msg.reply_to_message.text
             else:
-                args = msg.reply_to_message.caption_markdown_urled
+                args = msg.reply_to_message.caption
         else:
             args = msg.text.split(
                 None,
@@ -146,16 +146,20 @@ def get_welcome_type(msg: Message):
     if args:
         if msg.reply_to_message:
             argumen = (
-                msg.reply_to_message.caption_markdown_urled if msg.reply_to_message.caption_markdown_urled else ""
+                msg.reply_to_message.caption if msg.reply_to_message.caption else ""
             )
             offset = 0  # offset is no need since target was in reply
+            entities = msg.reply_to_message.parse_entities()
         else:
             argumen = args[1]
             offset = len(argumen) - len(
                 msg.text,
             )  # set correct offset relative to command + notename
+            entities = msg.parse_entities()
         text, buttons = button_markdown_parser(
-            argumen[offset:],
+            argumen,
+            entities=entities,
+            offset=offset,
         )
 
     if not data_type:

diff --git a/SaitamaRobot/modules/helper_funcs/string_handling.py b/SaitamaRobot/modules/helper_funcs/string_handling.py
@@ -57,32 +57,112 @@ def _calc_emoji_offset(to_calc) -> int:
     return sum(len(e.group(0).encode("utf-16-le")) // 2 - 1 for e in emoticons)
 
 
+def markdown_parser(
+    txt: str,
+    entities: Dict[MessageEntity, str] = None,
+    offset: int = 0,
+) -> str:
+    """
+    Parse a string, escaping all invalid markdown entities.
+
+    Escapes URL's so as to avoid URL mangling.
+    Re-adds any telegram code entities obtained from the entities object.
+
+    :param txt: text to parse
+    :param entities: dict of message entities in text
+    :param offset: message offset - command and notename length
+    :return: valid markdown string
+    """
+    if not entities:
+        entities = {}
+    if not txt:
+        return ""
+
+    prev = 0
+    res = ""
+    # Loop over all message entities, and:
+    # reinsert code
+    # escape free-standing urls
+    for ent, ent_text in entities.items():
+        if ent.offset < -offset:
+            continue
+
+        start = ent.offset + offset  # start of entity
+        end = ent.offset + offset + ent.length - 1  # end of entity
+
+        # we only care about code, url, text links
+        if ent.type in ("code", "url", "text_link"):
+            # count emoji to switch counter
+            count = _calc_emoji_offset(txt[:start])
+            start -= count
+            end -= count
+
+            # URL handling -> do not escape if in [](), escape otherwise.
+            if ent.type == "url":
+                if any(
+                    match.start(1) <= start and end <= match.end(1)
+                    for match in LINK_REGEX.finditer(txt)
+                ):
+                    continue
+                # else, check the escapes between the prev and last and forcefully escape the url to avoid mangling
+                else:
+                    # TODO: investigate possible offset bug when lots of emoji are present
+                    res += _selective_escape(txt[prev:start] or "") + escape_markdown(
+                        ent_text,
+                    )
+
+            # code handling
+            elif ent.type == "code":
+                res += _selective_escape(txt[prev:start]) + "`" + ent_text + "`"
+
+            # handle markdown/html links
+            elif ent.type == "text_link":
+                res += _selective_escape(txt[prev:start]) + "[{}]({})".format(
+                    ent_text,
+                    ent.url,
+                )
+
+            end += 1
+
+        # anything else
+        else:
+            continue
+
+        prev = end
+
+    res += _selective_escape(txt[prev:])  # add the rest of the text
+    return res
+
+
 def button_markdown_parser(
-    note: str,
+    txt: str,
+    entities: Dict[MessageEntity, str] = None,
+    offset: int = 0,
 ) -> (str, List):
+    markdown_note = markdown_parser(txt, entities, offset)
     prev = 0
     note_data = ""
     buttons = []
-    for match in BTN_URL_REGEX.finditer(note):
+    for match in BTN_URL_REGEX.finditer(markdown_note):
         # Check if btnurl is escaped
         n_escapes = 0
         to_check = match.start(1) - 1
-        while to_check > 0 and note[to_check] == "\\":
+        while to_check > 0 and markdown_note[to_check] == "\\":
             n_escapes += 1
             to_check -= 1
 
         # if even, not escaped -> create button
         if n_escapes % 2 == 0:
             # create a thruple with button label, url, and newline status
             buttons.append((match.group(2), match.group(3), bool(match.group(4))))
-            note_data += note[prev : match.start(1)]
+            note_data += markdown_note[prev : match.start(1)]
             prev = match.end(1)
         # if odd, escaped -> move along
         else:
-            note_data += note[prev:to_check]
+            note_data += markdown_note[prev:to_check]
             prev = match.start(1) - 1
     else:
-        note_data += note[prev:]
+        note_data += markdown_note[prev:]
 
     return note_data, buttons
 

diff --git a/SaitamaRobot/modules/notes.py b/SaitamaRobot/modules/notes.py
@@ -417,13 +417,7 @@ def save(update: Update, context: CallbackContext):
     if len(m) == 1:
         msg.reply_text("Provide something to save.")
         return
-    try:
-        note_name, text, data_type, content, buttons = get_note_type(msg)
-    except ValueError as e:
-        if "Nested entities are not supported for Markdown version 1" in e.args:
-            msg.reply_text("Nested entities are currently not supported.")
-            return
-
+    note_name, text, data_type, content, buttons = get_note_type(msg)
     note_name = note_name.lower()
     if data_type is None:
         msg.reply_text("Dude, there's no note")