social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1from html.parser import HTMLParser
2from typing import override
3import cross.fragments as f
4
5
6class StatusParser(HTMLParser):
7 def __init__(self) -> None:
8 super().__init__()
9 self.text: str = ""
10 self.fragments: list[f.Fragment] = []
11
12 self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {}
13 self.in_pre: bool = False
14 self.in_code: bool = False
15
16 self.invisible: bool = False
17
18 @override
19 def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
20 _attr = dict(attrs)
21
22 def append_newline():
23 if self.text and not self.text.endswith("\n"):
24 self.text += "\n"
25
26 if self.invisible:
27 return
28
29 match tag:
30 case "p":
31 cls = _attr.get('class', '')
32 if cls and 'quote-inline' in cls:
33 self.invisible = True
34 case "a":
35 self._tag_stack["a"] = (len(self.text), _attr)
36 case "code":
37 if not self.in_pre:
38 self.text += "`"
39 self.in_code = True
40 case "pre":
41 append_newline()
42 self.text += "```\n"
43 self.in_pre = True
44 case "blockquote":
45 append_newline()
46 self.text += "> "
47 case "strong" | "b":
48 self.text += "**"
49 case "em" | "i":
50 self.text += "*"
51 case "del" | "s":
52 self.text += "~~"
53 case "br":
54 self.text += "\n"
55 case _:
56 if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
57 level = int(tag[1])
58 self.text += "\n" + "#" * level + " "
59
60 @override
61 def handle_endtag(self, tag: str) -> None:
62 if self.invisible:
63 if tag == "p":
64 self.invisible = False
65 return
66
67 current_end = len(self.text)
68 match tag:
69 case "a":
70 if "a" in self._tag_stack:
71 start, _attr = self._tag_stack.pop("a")
72
73 href = _attr.get('href')
74 if href and current_end > start:
75 cls = _attr.get('class', '')
76 if cls:
77 if 'hashtag' in cls:
78 tag = self.text[start:current_end]
79 tag = tag[1:] if tag.startswith('#') else tag
80
81 self.fragments.append(
82 f.TagFragment(start=start, end=current_end, tag=tag)
83 )
84 return
85 if 'mention' in cls: # TODO put the full acct in the fragment
86 mention = self.text[start:current_end]
87 self.fragments.append(
88 f.MentionFragment(start=start, end=current_end, uri=mention)
89 )
90 return
91 self.fragments.append(
92 f.LinkFragment(start=start, end=current_end, url=href)
93 )
94 case "code":
95 if not self.in_pre and self.in_code:
96 self.text += "`"
97 self.in_code = False
98 case "pre":
99 self.text += "\n```\n"
100 self.in_pre = False
101 case "blockquote":
102 self.text += "\n"
103 case "strong" | "b":
104 self.text += "**"
105 case "em" | "i":
106 self.text += "*"
107 case "del" | "s":
108 self.text += "~~"
109 case "p":
110 self.text += "\n\n"
111 case _:
112 if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
113 self.text += '\n'
114
115 @override
116 def handle_data(self, data: str) -> None:
117 if not self.invisible:
118 self.text += data
119
120 def get_result(self) -> tuple[str, list[f.Fragment]]:
121 if self.text.endswith('\n\n'):
122 return self.text[:-2], self.fragments
123 return self.text, self.fragments