From 7fd31da79bb071bfa1ec465bfc408d5601a66b8e Mon Sep 17 00:00:00 2001 From: DevOps117 <55235206+devops117@users.noreply.github.com> Date: Mon, 28 Mar 2022 20:07:28 +0530 Subject: [PATCH 1/2] Revert "Format code." --- SaitamaRobot/modules/cust_filters.py | 13 ++++++++++--- SaitamaRobot/modules/feds.py | 4 +--- SaitamaRobot/modules/helper_funcs/msg_types.py | 9 ++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/SaitamaRobot/modules/cust_filters.py b/SaitamaRobot/modules/cust_filters.py index 350ea1b94b..d464205df4 100644 --- a/SaitamaRobot/modules/cust_filters.py +++ b/SaitamaRobot/modules/cust_filters.py @@ -113,6 +113,7 @@ def filters(update, context): if "Nested entities are not supported for Markdown version 1" in e.args: msg.reply_text("Nested entities are currently not supported.") + conn = connected(context.bot, update, chat, user.id) if not conn is False: chat_id = conn @@ -158,7 +159,9 @@ def filters(update, context): offset = len(extracted[1]) - len( msg.text ) # set correct offset relative to command + notename - text, buttons = button_markdown_parser(extracted[1][offset:]) + text, buttons = button_markdown_parser( + extracted[1][offset:] + ) text = text.strip() if not text: send_message( @@ -182,7 +185,9 @@ def filters(update, context): offset = len( text_to_parsing ) # set correct offset relative to command + notename - text, buttons = button_markdown_parser(text_to_parsing[offset:]) + text, buttons = button_markdown_parser( + text_to_parsing[offset:] + ) text = text.strip() elif not text and not file_type: @@ -207,7 +212,9 @@ def filters(update, context): offset = len( text_to_parsing ) # set correct offset relative to command + notename - text, buttons = button_markdown_parser(text_to_parsing[offset:]) + text, buttons = button_markdown_parser( + text_to_parsing[offset:] + ) text = text.strip() if (msg.reply_to_message.text or msg.reply_to_message.caption) and not text: send_message( diff --git a/SaitamaRobot/modules/feds.py b/SaitamaRobot/modules/feds.py index d07404b95a..3dbb768a02 100644 --- a/SaitamaRobot/modules/feds.py +++ b/SaitamaRobot/modules/feds.py @@ -1291,9 +1291,7 @@ def fed_broadcast(update: Update, context: CallbackContext): return # Parsing md try: - raw_text = msg.text_markdown_urled.split( - None, 1 - ) # use python's maxsplit to separate cmd and args + raw_text = msg.text_markdown_urled.split(None, 1) # use python's maxsplit to separate cmd and args except ValueError as e: if "Nested entities are not supported for Markdown version 1" in e.args: msg.reply_text("Nested entities are currently not supported.") diff --git a/SaitamaRobot/modules/helper_funcs/msg_types.py b/SaitamaRobot/modules/helper_funcs/msg_types.py index a91aa54ca0..1f1cec91cd 100644 --- a/SaitamaRobot/modules/helper_funcs/msg_types.py +++ b/SaitamaRobot/modules/helper_funcs/msg_types.py @@ -45,10 +45,7 @@ def get_note_type(msg: Message): if msg.reply_to_message.caption else msg.reply_to_message.parse_entities() ) - msgtext = ( - msg.reply_to_message.text_markdown_urled - or msg.reply_to_message.caption_markdown_urled - ) + msgtext = msg.reply_to_message.text_markdown_urled or msg.reply_to_message.caption_markdown_urled if len(args) >= 2 and msg.reply_to_message.text: # not caption, text text, buttons = button_markdown_parser(msgtext) @@ -149,9 +146,7 @@ def get_welcome_type(msg: Message): if args: if msg.reply_to_message: argumen = ( - msg.reply_to_message.caption_markdown_urled - if msg.reply_to_message.caption_markdown_urled - else "" + msg.reply_to_message.caption_markdown_urled if msg.reply_to_message.caption_markdown_urled else "" ) offset = 0 # offset is no need since target was in reply else: From 294be1d44a3f3e1bae711bb8b5fccc6afb177a34 Mon Sep 17 00:00:00 2001 From: devops117 <55235206+devops117@users.noreply.github.com> Date: Mon, 28 Mar 2022 20:10:19 +0530 Subject: [PATCH 2/2] Revert "Use caption_markdown_urled everywhere, for notes, rules, feds, etc" This reverts commit 1f169bf885a1779290387b4db42e8423b5b6db70. --- SaitamaRobot/modules/cust_filters.py | 51 ++++------ SaitamaRobot/modules/feds.py | 31 +++---- .../modules/helper_funcs/msg_types.py | 34 ++++--- .../modules/helper_funcs/string_handling.py | 92 +++++++++++++++++-- SaitamaRobot/modules/notes.py | 8 +- SaitamaRobot/modules/rules.py | 17 ++-- SaitamaRobot/modules/welcome.py | 27 ++++-- 7 files changed, 167 insertions(+), 93 deletions(-) diff --git a/SaitamaRobot/modules/cust_filters.py b/SaitamaRobot/modules/cust_filters.py index d464205df4..76d218a8a6 100644 --- a/SaitamaRobot/modules/cust_filters.py +++ b/SaitamaRobot/modules/cust_filters.py @@ -105,14 +105,9 @@ def filters(update, context): chat = update.effective_chat user = update.effective_user msg = update.effective_message - try: - args = msg.text_markdown_urled.split( - None, 1 - ) # use python's maxsplit to separate Cmd, keyword, and reply_text - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - + args = msg.text.split( + None, 1 + ) # use python's maxsplit to separate Cmd, keyword, and reply_text conn = connected(context.bot, update, chat, user.id) if not conn is False: @@ -160,7 +155,7 @@ def filters(update, context): msg.text ) # set correct offset relative to command + notename text, buttons = button_markdown_parser( - extracted[1][offset:] + extracted[1], entities=msg.parse_entities(), offset=offset ) text = text.strip() if not text: @@ -171,22 +166,17 @@ def filters(update, context): return elif msg.reply_to_message and len(args) >= 2: - try: - if msg.reply_to_message.text: - text_to_parsing = msg.reply_to_message.text_markdown_urled - elif msg.reply_to_message.caption: - text_to_parsing = msg.reply_to_message.caption_markdown_urled - else: - text_to_parsing = "" - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - + if msg.reply_to_message.text: + text_to_parsing = msg.reply_to_message.text + elif msg.reply_to_message.caption: + text_to_parsing = msg.reply_to_message.caption + else: + text_to_parsing = "" offset = len( text_to_parsing ) # set correct offset relative to command + notename text, buttons = button_markdown_parser( - text_to_parsing[offset:] + text_to_parsing, entities=msg.parse_entities(), offset=offset ) text = text.strip() @@ -198,22 +188,17 @@ def filters(update, context): return elif msg.reply_to_message: - try: - if msg.reply_to_message.text: - text_to_parsing = msg.reply_to_message.text_markdown_urled - elif msg.reply_to_message.caption: - text_to_parsing = msg.reply_to_message.caption_markdown_urled - else: - text_to_parsing = "" - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - + if msg.reply_to_message.text: + text_to_parsing = msg.reply_to_message.text + elif msg.reply_to_message.caption: + text_to_parsing = msg.reply_to_message.caption + else: + text_to_parsing = "" offset = len( text_to_parsing ) # set correct offset relative to command + notename text, buttons = button_markdown_parser( - text_to_parsing[offset:] + text_to_parsing, entities=msg.parse_entities(), offset=offset ) text = text.strip() if (msg.reply_to_message.text or msg.reply_to_message.caption) and not text: diff --git a/SaitamaRobot/modules/feds.py b/SaitamaRobot/modules/feds.py index 3dbb768a02..89fb6799ea 100644 --- a/SaitamaRobot/modules/feds.py +++ b/SaitamaRobot/modules/feds.py @@ -24,6 +24,7 @@ extract_user, extract_user_fban, ) +from SaitamaRobot.modules.helper_funcs.string_handling import markdown_parser from telegram import ( InlineKeyboardButton, InlineKeyboardMarkup, @@ -1212,16 +1213,16 @@ def set_frules(update: Update, context: CallbackContext): if len(args) >= 1: msg = update.effective_message - try: - raw_text = msg.text_markdown_urled - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - return - + raw_text = msg.text args = raw_text.split(None, 1) # use python's maxsplit to separate cmd and args if len(args) == 2: - markdown_rules = args[1] + txt = args[1] + offset = len(txt) - len(raw_text) # set correct offset relative to command + markdown_rules = markdown_parser( + txt, + entities=msg.parse_entities(), + offset=offset, + ) x = sql.set_frules(fed_id, markdown_rules) if not x: update.effective_message.reply_text( @@ -1290,14 +1291,12 @@ def fed_broadcast(update: Update, context: CallbackContext): update.effective_message.reply_text("Only federation owners can do this!") return # Parsing md - try: - raw_text = msg.text_markdown_urled.split(None, 1) # use python's maxsplit to separate cmd and args - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - return - - text = args[1] + raw_text = msg.text + args = raw_text.split(None, 1) # use python's maxsplit to separate cmd and args + txt = args[1] + offset = len(txt) - len(raw_text) # set correct offset relative to command + text_parser = markdown_parser(txt, entities=msg.parse_entities(), offset=offset) + text = text_parser try: broadcaster = user.first_name except: diff --git a/SaitamaRobot/modules/helper_funcs/msg_types.py b/SaitamaRobot/modules/helper_funcs/msg_types.py index 1f1cec91cd..2fb7274e57 100644 --- a/SaitamaRobot/modules/helper_funcs/msg_types.py +++ b/SaitamaRobot/modules/helper_funcs/msg_types.py @@ -20,8 +20,7 @@ def get_note_type(msg: Message): data_type = None content = None text = "" - raw_text = msg.text_markdown_urled or msg.caption_markdown_urled - + raw_text = msg.text or msg.caption args = raw_text.split(None, 2) # use python's maxsplit to separate cmd and args note_name = args[1] @@ -32,7 +31,9 @@ def get_note_type(msg: Message): raw_text, ) # set correct offset relative to command + notename text, buttons = button_markdown_parser( - args[2][offset:], + args[2], + entities=msg.parse_entities() or msg.parse_caption_entities(), + offset=offset, ) if buttons: data_type = Types.BUTTON_TEXT @@ -45,10 +46,9 @@ def get_note_type(msg: Message): if msg.reply_to_message.caption else msg.reply_to_message.parse_entities() ) - msgtext = msg.reply_to_message.text_markdown_urled or msg.reply_to_message.caption_markdown_urled - + msgtext = msg.reply_to_message.text or msg.reply_to_message.caption if len(args) >= 2 and msg.reply_to_message.text: # not caption, text - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) if buttons: data_type = Types.BUTTON_TEXT else: @@ -60,27 +60,27 @@ def get_note_type(msg: Message): elif msg.reply_to_message.document: content = msg.reply_to_message.document.file_id - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) data_type = Types.DOCUMENT elif msg.reply_to_message.photo: content = msg.reply_to_message.photo[-1].file_id # last elem = best quality - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) data_type = Types.PHOTO elif msg.reply_to_message.audio: content = msg.reply_to_message.audio.file_id - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) data_type = Types.AUDIO elif msg.reply_to_message.voice: content = msg.reply_to_message.voice.file_id - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) data_type = Types.VOICE elif msg.reply_to_message.video: content = msg.reply_to_message.video.file_id - text, buttons = button_markdown_parser(msgtext) + text, buttons = button_markdown_parser(msgtext, entities=entities) data_type = Types.VIDEO return note_name, text, data_type, content, buttons @@ -95,9 +95,9 @@ def get_welcome_type(msg: Message): try: if msg.reply_to_message: if msg.reply_to_message.text: - args = msg.reply_to_message.text_markdown_urled + args = msg.reply_to_message.text else: - args = msg.reply_to_message.caption_markdown_urled + args = msg.reply_to_message.caption else: args = msg.text.split( None, @@ -146,16 +146,20 @@ def get_welcome_type(msg: Message): if args: if msg.reply_to_message: argumen = ( - msg.reply_to_message.caption_markdown_urled if msg.reply_to_message.caption_markdown_urled else "" + msg.reply_to_message.caption if msg.reply_to_message.caption else "" ) offset = 0 # offset is no need since target was in reply + entities = msg.reply_to_message.parse_entities() else: argumen = args[1] offset = len(argumen) - len( msg.text, ) # set correct offset relative to command + notename + entities = msg.parse_entities() text, buttons = button_markdown_parser( - argumen[offset:], + argumen, + entities=entities, + offset=offset, ) if not data_type: diff --git a/SaitamaRobot/modules/helper_funcs/string_handling.py b/SaitamaRobot/modules/helper_funcs/string_handling.py index a68de2553d..35420ad89a 100644 --- a/SaitamaRobot/modules/helper_funcs/string_handling.py +++ b/SaitamaRobot/modules/helper_funcs/string_handling.py @@ -57,17 +57,97 @@ def _calc_emoji_offset(to_calc) -> int: return sum(len(e.group(0).encode("utf-16-le")) // 2 - 1 for e in emoticons) +def markdown_parser( + txt: str, + entities: Dict[MessageEntity, str] = None, + offset: int = 0, +) -> str: + """ + Parse a string, escaping all invalid markdown entities. + + Escapes URL's so as to avoid URL mangling. + Re-adds any telegram code entities obtained from the entities object. + + :param txt: text to parse + :param entities: dict of message entities in text + :param offset: message offset - command and notename length + :return: valid markdown string + """ + if not entities: + entities = {} + if not txt: + return "" + + prev = 0 + res = "" + # Loop over all message entities, and: + # reinsert code + # escape free-standing urls + for ent, ent_text in entities.items(): + if ent.offset < -offset: + continue + + start = ent.offset + offset # start of entity + end = ent.offset + offset + ent.length - 1 # end of entity + + # we only care about code, url, text links + if ent.type in ("code", "url", "text_link"): + # count emoji to switch counter + count = _calc_emoji_offset(txt[:start]) + start -= count + end -= count + + # URL handling -> do not escape if in [](), escape otherwise. + if ent.type == "url": + if any( + match.start(1) <= start and end <= match.end(1) + for match in LINK_REGEX.finditer(txt) + ): + continue + # else, check the escapes between the prev and last and forcefully escape the url to avoid mangling + else: + # TODO: investigate possible offset bug when lots of emoji are present + res += _selective_escape(txt[prev:start] or "") + escape_markdown( + ent_text, + ) + + # code handling + elif ent.type == "code": + res += _selective_escape(txt[prev:start]) + "`" + ent_text + "`" + + # handle markdown/html links + elif ent.type == "text_link": + res += _selective_escape(txt[prev:start]) + "[{}]({})".format( + ent_text, + ent.url, + ) + + end += 1 + + # anything else + else: + continue + + prev = end + + res += _selective_escape(txt[prev:]) # add the rest of the text + return res + + def button_markdown_parser( - note: str, + txt: str, + entities: Dict[MessageEntity, str] = None, + offset: int = 0, ) -> (str, List): + markdown_note = markdown_parser(txt, entities, offset) prev = 0 note_data = "" buttons = [] - for match in BTN_URL_REGEX.finditer(note): + for match in BTN_URL_REGEX.finditer(markdown_note): # Check if btnurl is escaped n_escapes = 0 to_check = match.start(1) - 1 - while to_check > 0 and note[to_check] == "\\": + while to_check > 0 and markdown_note[to_check] == "\\": n_escapes += 1 to_check -= 1 @@ -75,14 +155,14 @@ def button_markdown_parser( if n_escapes % 2 == 0: # create a thruple with button label, url, and newline status buttons.append((match.group(2), match.group(3), bool(match.group(4)))) - note_data += note[prev : match.start(1)] + note_data += markdown_note[prev : match.start(1)] prev = match.end(1) # if odd, escaped -> move along else: - note_data += note[prev:to_check] + note_data += markdown_note[prev:to_check] prev = match.start(1) - 1 else: - note_data += note[prev:] + note_data += markdown_note[prev:] return note_data, buttons diff --git a/SaitamaRobot/modules/notes.py b/SaitamaRobot/modules/notes.py index 175a4a96cb..eb1c5e10a9 100644 --- a/SaitamaRobot/modules/notes.py +++ b/SaitamaRobot/modules/notes.py @@ -417,13 +417,7 @@ def save(update: Update, context: CallbackContext): if len(m) == 1: msg.reply_text("Provide something to save.") return - try: - note_name, text, data_type, content, buttons = get_note_type(msg) - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - return - + note_name, text, data_type, content, buttons = get_note_type(msg) note_name = note_name.lower() if data_type is None: msg.reply_text("Dude, there's no note") diff --git a/SaitamaRobot/modules/rules.py b/SaitamaRobot/modules/rules.py index 53d8ab9426..aec1d1ad3e 100644 --- a/SaitamaRobot/modules/rules.py +++ b/SaitamaRobot/modules/rules.py @@ -3,6 +3,7 @@ import SaitamaRobot.modules.sql.rules_sql as sql from SaitamaRobot import dispatcher from SaitamaRobot.modules.helper_funcs.chat_status import user_admin +from SaitamaRobot.modules.helper_funcs.string_handling import markdown_parser from telegram import ( InlineKeyboardButton, InlineKeyboardMarkup, @@ -94,15 +95,17 @@ def send_rules(update, chat_id, from_pm=False): def set_rules(update: Update, context: CallbackContext): chat_id = update.effective_chat.id msg = update.effective_message # type: Optional[Message] - try: - raw_text = msg.text_markdown_urled - except ValueError as e: - if "Nested entities are not supported for Markdown version 1" in e.args: - msg.reply_text("Nested entities are currently not supported.") - + raw_text = msg.text args = raw_text.split(None, 1) # use python's maxsplit to separate cmd and args if len(args) == 2: - markdown_rules = args[1] + txt = args[1] + offset = len(txt) - len(raw_text) # set correct offset relative to command + markdown_rules = markdown_parser( + txt, + entities=msg.parse_entities(), + offset=offset, + ) + sql.set_rules(chat_id, markdown_rules) update.effective_message.reply_text("Successfully set rules for this group.") diff --git a/SaitamaRobot/modules/welcome.py b/SaitamaRobot/modules/welcome.py index 0c4ca1606f..ff9f7f0e10 100644 --- a/SaitamaRobot/modules/welcome.py +++ b/SaitamaRobot/modules/welcome.py @@ -23,6 +23,7 @@ from SaitamaRobot.modules.helper_funcs.msg_types import get_welcome_type from SaitamaRobot.modules.helper_funcs.string_handling import ( escape_invalid_curly_brackets, + markdown_parser, ) from SaitamaRobot.modules.log_channel import loggable from telegram import ( @@ -96,23 +97,29 @@ def send(update, message, keyboard, backup_message): ) elif excp.message == "Button_url_invalid": msg = update.effective_message.reply_text( - backup_message + "\nNote: the current message has an invalid url " - "in one of its buttons. Please update.", + markdown_parser( + backup_message + "\nNote: the current message has an invalid url " + "in one of its buttons. Please update.", + ), parse_mode=ParseMode.MARKDOWN, reply_to_message_id=reply, ) elif excp.message == "Unsupported url protocol": msg = update.effective_message.reply_text( - backup_message + "\nNote: the current message has buttons which " - "use url protocols that are unsupported by " - "telegram. Please update.", + markdown_parser( + backup_message + "\nNote: the current message has buttons which " + "use url protocols that are unsupported by " + "telegram. Please update.", + ), parse_mode=ParseMode.MARKDOWN, reply_to_message_id=reply, ) elif excp.message == "Wrong url host": msg = update.effective_message.reply_text( - backup_message + "\nNote: the current message has some bad urls. " - "Please update.", + markdown_parser( + backup_message + "\nNote: the current message has some bad urls. " + "Please update.", + ), parse_mode=ParseMode.MARKDOWN, reply_to_message_id=reply, ) @@ -123,8 +130,10 @@ def send(update, message, keyboard, backup_message): return else: msg = update.effective_message.reply_text( - backup_message + "\nNote: An error occured when sending the " - "custom message. Please update.", + markdown_parser( + backup_message + "\nNote: An error occured when sending the " + "custom message. Please update.", + ), parse_mode=ParseMode.MARKDOWN, reply_to_message_id=reply, )