This repository was archived by the owner on Apr 9, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathformatting.py
More file actions
61 lines (44 loc) · 1.78 KB
/
formatting.py
File metadata and controls
61 lines (44 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""Convert markdown to Telegram-safe HTML."""
from __future__ import annotations
import html
import re
def md_to_telegram_html(text: str) -> str:
"""Convert common markdown to Telegram HTML.
Handles: **bold**, *italic*, `inline code`, ```code blocks```,
[text](url), and # headings. Everything else is HTML-escaped.
"""
# Extract code blocks first to protect their content
code_blocks: list[str] = []
def _save_code_block(m: re.Match) -> str:
lang = m.group(1) or ""
code = html.escape(m.group(2))
code_blocks.append(f"<pre>{code}</pre>")
return f"\x00CODEBLOCK{len(code_blocks) - 1}\x00"
text = re.sub(r"```(\w*)\n?(.*?)```", _save_code_block, text, flags=re.DOTALL)
# Extract inline code
inline_codes: list[str] = []
def _save_inline_code(m: re.Match) -> str:
code = html.escape(m.group(1))
inline_codes.append(f"<code>{code}</code>")
return f"\x00INLINE{len(inline_codes) - 1}\x00"
text = re.sub(r"`([^`]+)`", _save_inline_code, text)
# HTML-escape the rest
text = html.escape(text)
# Links: [text](url)
text = re.sub(
r"\[([^\]]+)\]\(([^)]+)\)",
r'<a href="\2">\1</a>',
text,
)
# Bold: **text**
text = re.sub(r"\*\*(.+?)\*\*", r"<b>\1</b>", text)
# Italic: *text* (but not inside words like file*name)
text = re.sub(r"(?<!\w)\*(.+?)\*(?!\w)", r"<i>\1</i>", text)
# Headings: # Title → bold
text = re.sub(r"^#{1,6}\s+(.+)$", r"<b>\1</b>", text, flags=re.MULTILINE)
# Restore code blocks and inline code
for i, block in enumerate(code_blocks):
text = text.replace(f"\x00CODEBLOCK{i}\x00", block)
for i, code in enumerate(inline_codes):
text = text.replace(f"\x00INLINE{i}\x00", code)
return text