social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1from html.parser import HTMLParser 2from typing import override 3import cross.fragments as f 4 5 6class StatusParser(HTMLParser): 7 def __init__(self) -> None: 8 super().__init__() 9 self.text: str = "" 10 self.fragments: list[f.Fragment] = [] 11 12 self._tag_stack: dict[str, tuple[int, dict[str, str | None]]] = {} 13 self.in_pre: bool = False 14 self.in_code: bool = False 15 16 self.invisible: bool = False 17 18 @override 19 def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: 20 _attr = dict(attrs) 21 22 def append_newline(): 23 if self.text and not self.text.endswith("\n"): 24 self.text += "\n" 25 26 if self.invisible: 27 return 28 29 match tag: 30 case "p": 31 cls = _attr.get('class', '') 32 if cls and 'quote-inline' in cls: 33 self.invisible = True 34 case "a": 35 self._tag_stack["a"] = (len(self.text), _attr) 36 case "code": 37 if not self.in_pre: 38 self.text += "`" 39 self.in_code = True 40 case "pre": 41 append_newline() 42 self.text += "```\n" 43 self.in_pre = True 44 case "blockquote": 45 append_newline() 46 self.text += "> " 47 case "strong" | "b": 48 self.text += "**" 49 case "em" | "i": 50 self.text += "*" 51 case "del" | "s": 52 self.text += "~~" 53 case "br": 54 self.text += "\n" 55 case _: 56 if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}: 57 level = int(tag[1]) 58 self.text += "\n" + "#" * level + " " 59 60 @override 61 def handle_endtag(self, tag: str) -> None: 62 if self.invisible: 63 if tag == "p": 64 self.invisible = False 65 return 66 67 current_end = len(self.text) 68 match tag: 69 case "a": 70 if "a" in self._tag_stack: 71 start, _attr = self._tag_stack.pop("a") 72 73 href = _attr.get('href') 74 if href and current_end > start: 75 cls = _attr.get('class', '') 76 if cls: 77 if 'hashtag' in cls: 78 tag = self.text[start:current_end] 79 tag = tag[1:] if tag.startswith('#') else tag 80 81 self.fragments.append( 82 f.TagFragment(start=start, end=current_end, tag=tag) 83 ) 84 return 85 if 'mention' in cls: # TODO put the full acct in the fragment 86 mention = self.text[start:current_end] 87 self.fragments.append( 88 f.MentionFragment(start=start, end=current_end, uri=mention) 89 ) 90 return 91 self.fragments.append( 92 f.LinkFragment(start=start, end=current_end, url=href) 93 ) 94 case "code": 95 if not self.in_pre and self.in_code: 96 self.text += "`" 97 self.in_code = False 98 case "pre": 99 self.text += "\n```\n" 100 self.in_pre = False 101 case "blockquote": 102 self.text += "\n" 103 case "strong" | "b": 104 self.text += "**" 105 case "em" | "i": 106 self.text += "*" 107 case "del" | "s": 108 self.text += "~~" 109 case "p": 110 self.text += "\n\n" 111 case _: 112 if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: 113 self.text += '\n' 114 115 @override 116 def handle_data(self, data: str) -> None: 117 if not self.invisible: 118 self.text += data 119 120 def get_result(self) -> tuple[str, list[f.Fragment]]: 121 if self.text.endswith('\n\n'): 122 return self.text[:-2], self.fragments 123 return self.text, self.fragments