social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1from html.parser import HTMLParser 2from typing import override 3import cross.fragments as f 4 5 6class HTMLToFragmentsParser(HTMLParser): 7 def __init__(self) -> None: 8 super().__init__() 9 self.text: str = "" 10 self.fragments: list[f.Fragment] = [] 11 12 self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {} 13 self.in_pre: bool = False 14 self.in_code: bool = False 15 16 self.invisible: bool = False 17 18 def handle_a_endtag(self): 19 current_end = len(self.text) 20 start, _attr = self._tag_stack.pop("a") 21 22 href = _attr.get('href') 23 if href and current_end > start: 24 self.fragments.append( 25 f.LinkFragment(start=start, end=current_end, url=href) 26 ) 27 28 @override 29 def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: 30 _attr = dict(attrs) 31 32 def append_newline(): 33 if self.text and not self.text.endswith("\n"): 34 self.text += "\n" 35 36 if self.invisible: 37 return 38 39 match tag: 40 case "p": 41 cls = _attr.get('class', '') 42 if cls and 'quote-inline' in cls: 43 self.invisible = True 44 case "a": 45 self._tag_stack["a"] = (len(self.text), _attr) 46 case "code": 47 if not self.in_pre: 48 self.text += "`" 49 self.in_code = True 50 case "pre": 51 append_newline() 52 self.text += "```\n" 53 self.in_pre = True 54 case "blockquote": 55 append_newline() 56 self.text += "> " 57 case "strong" | "b": 58 self.text += "**" 59 case "em" | "i": 60 self.text += "*" 61 case "del" | "s": 62 self.text += "~~" 63 case "br": 64 self.text += "\n" 65 case _: 66 if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: 67 level = int(tag[1]) 68 self.text += "\n" + "#" * level + " " 69 70 @override 71 def handle_endtag(self, tag: str) -> None: 72 if self.invisible: 73 if tag == "p": 74 self.invisible = False 75 return 76 77 match tag: 78 case "a": 79 if "a" in self._tag_stack: 80 self.handle_a_endtag() 81 case "code": 82 if not self.in_pre and self.in_code: 83 self.text += "`" 84 self.in_code = False 85 case "pre": 86 self.text += "\n```\n" 87 self.in_pre = False 88 case "blockquote": 89 self.text += "\n" 90 case "strong" | "b": 91 self.text += "**" 92 case "em" | "i": 93 self.text += "*" 94 case "del" | "s": 95 self.text += "~~" 96 case "p": 97 self.text += "\n\n" 98 case _: 99 if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: 100 self.text += '\n' 101 102 @override 103 def handle_data(self, data: str) -> None: 104 if not self.invisible: 105 self.text += data 106 107 def get_result(self) -> tuple[str, list[f.Fragment]]: 108 if self.text.endswith('\n\n'): 109 return self.text[:-2], self.fragments 110 return self.text, self.fragments