diff --git a/telebot/formatting.py b/telebot/formatting.py index 0e300f7c1..ebbcb77be 100644 --- a/telebot/formatting.py +++ b/telebot/formatting.py @@ -1,7 +1,5 @@ """ Markdown & HTML formatting functions. - -.. versionadded:: 4.5.1 """ import re @@ -45,6 +43,7 @@ def escape_html(content: str) -> str: return html.escape(content) +# noinspection RegExpRedundantEscape def escape_markdown(content: str) -> str: """ Escapes Markdown characters in a string of Markdown. @@ -269,6 +268,9 @@ def mcode(content: str, language: str="", escape: Optional[bool]=True) -> str: :param content: The string to code. :type content: :obj:`str` + :param language: The programming language of the code. Defaults to an empty string. + :type language: :obj:`str` + :param escape: True if you need to escape special characters. Defaults to True. :type escape: :obj:`bool` @@ -304,6 +306,9 @@ def hpre(content: str, escape: Optional[bool]=True, language: str="") -> str: :param escape: True if you need to escape special characters. Defaults to True. :type escape: :obj:`bool` + :param language: The programming language of the code. Defaults to an empty string. + :type language: :obj:`str` + :return: The formatted string. :rtype: :obj:`str` """ @@ -368,31 +373,11 @@ def hcite(content: str, escape: Optional[bool] = True, expandable: Optional[bool ) -def apply_html_entities(text: str, entities: Optional[List], custom_subs: Optional[Dict[str, str]]) -> str: +def apply_html_entities(text: str, entities: Optional[List]=None, custom_subs: Optional[Dict[str, str]]=None) -> str: """ - Author: @sviat9440 - Updaters: @badiboy, @EgorKhabarov - Message: "*Test* parse _formatting_, [url](https://example.com), [text_mention](tg://user?id=123456) and mention @username" - - .. code-block:: python3 - :caption: Example: - - apply_html_entities(text, entities) - >> "Test parse formatting, url, text_mention and mention @username" - - Custom subs: - You can customize the substitutes. By default, there is no substitute for the entities: hashtag, bot_command, email. You can add or modify substitute an existing entity. - .. code-block:: python3 - :caption: Example: - - apply_html_entities( - text, - entities, - {"bold": "{text}", "italic": "{text}", "mention": "{text}"}, - ) - >> "Test parse formatting, url and text_mention and mention @username" + Apply HTML formatting to text based on provided entities. + Handles nested and overlapping entities correctly. """ - if not entities: return text.replace("&", "&").replace("<", "<").replace(">", ">") @@ -401,7 +386,6 @@ def apply_html_entities(text: str, entities: Optional[List], custom_subs: Option "italic": "{text}", "pre": "
{text}",
"code": "{text}",
- # "url": "{text}", # @badiboy plain URLs have no text and do not need tags
"text_link": "{text}",
"strikethrough": "{text}", "expandable_blockquote": "
{text}", - } if custom_subs: for key, value in custom_subs.items(): _subs[key] = value + + # Sort entities by offset (starting position), with longer entities first for equal offsets + sorted_entities = sorted(entities, key=lambda e: (e.offset, -e.length)) + + # Convert text to utf-16 encoding for proper handling utf16_text = text.encode("utf-16-le") - html_text = "" - - def func(upd_text, subst_type=None, url=None, user=None, custom_emoji_id=None, language=None): - upd_text = upd_text.decode("utf-16-le") - if subst_type == "text_mention": - subst_type = "text_link" - url = "tg://user?id={0}".format(user.id) - elif subst_type == "mention": - url = "https://t.me/{0}".format(upd_text[1:]) - upd_text = upd_text.replace("&", "&").replace("<", "<").replace(">", ">") - if not subst_type or not _subs.get(subst_type): - return upd_text - subs = _subs.get(subst_type) - if subst_type == "custom_emoji": - return subs.format(text=upd_text, custom_emoji_id=custom_emoji_id) - elif (subst_type == "pre") and language: - return "
{1}".format(language, upd_text)
- return subs.format(text=upd_text, url=url)
-
- offset = 0
- start_index = 0
- end_index = 0
- for entity in entities:
- if entity.offset > offset:
- # when the offset is not 0: for example, a __b__
- # we need to add the text before the entity to the html_text
- html_text += func(utf16_text[offset * 2: entity.offset * 2])
- offset = entity.offset
-
- new_string = func(utf16_text[offset * 2: (offset + entity.length) * 2], subst_type=entity.type,
- url=entity.url, user=entity.user, custom_emoji_id=entity.custom_emoji_id,
- language=entity.language)
- start_index = len(html_text)
- html_text += new_string
- offset += entity.length
- end_index = len(html_text)
- elif entity.offset == offset:
- new_string = func(utf16_text[offset * 2: (offset + entity.length) * 2], subst_type=entity.type,
- url=entity.url, user=entity.user, custom_emoji_id=entity.custom_emoji_id,
- language=entity.language)
- start_index = len(html_text)
- html_text += new_string
- end_index = len(html_text)
- offset += entity.length
+
+ def escape_entity(text_part):
+ """Escape HTML special characters in a text part"""
+ if isinstance(text_part, bytes):
+ text_part = text_part.decode("utf-16-le")
+ return text_part.replace("&", "&").replace("<", "<").replace(">", ">")
+
+ def format_entity(entity, content):
+ """Apply entity formatting to the content"""
+ entity_type = entity.type
+
+ # Handle different entity types
+ if entity_type == "text_mention" and hasattr(entity, 'user'):
+ return f"{content}"
+ # elif entity_type == "mention": # No need to do this, @username works fine
+ # username = content[1:] # Remove @ symbol
+ # return f"{content}"
+ elif entity_type == "text_link" and hasattr(entity, 'url'):
+ return f"{content}"
+ elif entity_type == "custom_emoji" and hasattr(entity, 'custom_emoji_id'):
+ return f"{content}"
+ elif entity_type in _subs:
+ template = _subs[entity_type]
+ return template.format(text=content)
+
+ # If no matching entity type, return text as is
+ return content
+
+ def process_entities(byte_text, entity_list, start_pos=0, end_pos=None):
+ if end_pos is None:
+ end_pos = len(byte_text)
+
+ if not entity_list or start_pos >= end_pos:
+ return escape_entity(byte_text[start_pos:end_pos])
+
+ current_entity = entity_list[0]
+ current_start = current_entity.offset * 2
+ current_end = current_start + current_entity.length * 2
+
+ if current_end <= start_pos or current_start >= end_pos:
+ return escape_entity(byte_text[start_pos:end_pos])
+
+ result = []
+
+ if current_start > start_pos:
+ result.append(escape_entity(byte_text[start_pos:current_start]))
+
+ nested_entities = []
+ remaining_entities = []
+
+ for entity in entity_list[1:]:
+ entity_start = entity.offset * 2
+ # entity_end = entity_start + entity.length * 2
+
+ if current_start <= entity_start < current_end:
+ nested_entities.append(entity)
+ else:
+ remaining_entities.append(entity)
+
+ if nested_entities:
+ inner_content = process_entities(
+ byte_text,
+ nested_entities,
+ current_start,
+ current_end
+ )
else:
- # Here we are processing nested entities.
- # We shouldn't update offset, because they are the same as entity before.
- # And, here we are replacing previous string with a new html-rendered text(previous string is already html-rendered,
- # And we don't change it).
- entity_string = html_text[start_index: end_index].encode("utf-16-le")
- formatted_string = func(entity_string, subst_type=entity.type, url=entity.url, user=entity.user,
- custom_emoji_id=entity.custom_emoji_id,
- language=entity.language). \
- replace("&", "&").replace("<", "<").replace(">", ">")
- html_text = html_text[:start_index] + formatted_string + html_text[end_index:]
- end_index = len(html_text)
-
- if offset * 2 < len(utf16_text):
- html_text += func(utf16_text[offset * 2:])
-
- return html_text
+ inner_content = escape_entity(byte_text[current_start:current_end])
+
+ result.append(format_entity(current_entity, inner_content))
+
+ if current_end < end_pos and remaining_entities:
+ result.append(process_entities(
+ byte_text,
+ remaining_entities,
+ current_end,
+ end_pos
+ ))
+ elif current_end < end_pos:
+ result.append(escape_entity(byte_text[current_end:end_pos]))
+
+ return "".join(result)
+
+ html_result = process_entities(utf16_text, sorted_entities)
+
+ return html_result
diff --git a/tests/test_types.py b/tests/test_types.py
index 630ab658c..04e8b3ac8 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -278,7 +278,7 @@ def test_message_entity():
sample_string_1 = r'{"update_id":934522126,"message":{"message_id":1374510,"from":{"id":927266710,"is_bot":false,"first_name":">_run","username":"coder2020","language_code":"en","is_premium":true},"chat":{"id":927266710,"first_name":">_run","username":"coder2020","type":"private"},"date":1682177590,"text":"b b b","entities":[{"offset":0,"length":2,"type":"bold"},{"offset":0,"length":1,"type":"italic"},{"offset":2,"length":2,"type":"bold"},{"offset":2,"length":1,"type":"italic"},{"offset":4,"length":1,"type":"bold"},{"offset":4,"length":1,"type":"italic"}]}}'
update = types.Update.de_json(sample_string_1)
message: types.Message = update.message
- assert message.html_text == "b b b"
+ assert message.html_text == "b b b"
sample_string_2 = r'{"update_id":934522166,"message":{"message_id":1374526,"from":{"id":927266710,"is_bot":false,"first_name":">_run","username":"coder2020","language_code":"en","is_premium":true},"chat":{"id":927266710,"first_name":">_run","username":"coder2020","type":"private"},"date":1682179716,"text":"b b b","entities":[{"offset":0,"length":1,"type":"bold"},{"offset":2,"length":1,"type":"bold"},{"offset":4,"length":1,"type":"italic"}]}}'
message_2 = types.Update.de_json(sample_string_2).message
@@ -288,12 +288,16 @@ def test_message_entity():
sample_string_3 = r'{"update_id":934522172,"message":{"message_id":1374530,"from":{"id":927266710,"is_bot":false,"first_name":">_run","username":"coder2020","language_code":"en","is_premium":true},"chat":{"id":927266710,"first_name":">_run","username":"coder2020","type":"private"},"date":1682179968,"text":"This is a bold text with a nested italic and bold text.","entities":[{"offset":10,"length":4,"type":"bold"},{"offset":27,"length":7,"type":"italic"},{"offset":34,"length":15,"type":"bold"},{"offset":34,"length":15,"type":"italic"}]}}'
message_3 = types.Update.de_json(sample_string_3).message
- assert message_3.html_text == "This is a bold text with a nested italic and bold text."
+ assert \
+ (message_3.html_text == "This is a bold text with a nested italic and bold text.") or \
+ (message_3.html_text == "This is a bold text with a nested italic and bold text.")
sample_string_4 = r'{"update_id":934522437,"message":{"message_id":1374619,"from":{"id":927266710,"is_bot":false,"first_name":">_run","username":"coder2020","language_code":"en","is_premium":true},"chat":{"id":927266710,"first_name":">_run","username":"coder2020","type":"private"},"date":1682189507,"forward_from":{"id":927266710,"is_bot":false,"first_name":">_run","username":"coder2020","language_code":"en","is_premium":true},"forward_date":1682189124,"text":"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa😋😋","entities":[{"offset":0,"length":76,"type":"bold"},{"offset":0,"length":76,"type":"italic"},{"offset":0,"length":76,"type":"underline"},{"offset":0,"length":76,"type":"strikethrough"},{"offset":76,"length":2,"type":"custom_emoji","custom_emoji_id":"5456188142006575553"},{"offset":78,"length":2,"type":"custom_emoji","custom_emoji_id":"5456188142006575553"}]}}'
message_4 = types.Update.de_json(sample_string_4).message
- assert message_4.html_text == '