social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

fix: use match/case in html parser

zenfyr.dev 4d0cc0f2 aa8754a8

verified
Changed files
+80 -101
util
+80 -101
util/html_util.py
···
def __init__(self) -> None:
super().__init__()
self.tokens: list[cross.Token] = []
-
self.status: dict
self.mentions: list[tuple[str, str]]
self.tags: list[str]
···
if isinstance(last_token, cross.TextToken) and not last_token.text.endswith('\n'):
self.tokens.append(cross.TextToken('\n'))
-
if tag == 'br':
-
self.tokens.append(cross.TextToken(' \n'))
-
-
elif tag == 'a':
-
href = attrs_dict.get('href', '')
-
self.anchor_stack.append(href)
-
-
elif tag == 'strong' or tag == 'b':
-
self.tokens.append(cross.TextToken('**'))
-
-
elif tag == 'em' or tag == 'i':
-
self.tokens.append(cross.TextToken('*'))
-
-
elif tag == 'del' or tag == 's':
-
self.tokens.append(cross.TextToken('~~'))
-
-
elif tag == 'code':
-
if not self.in_pre:
-
self.tokens.append(cross.TextToken('`'))
-
self.in_code = True
-
-
elif tag == 'pre':
-
append_newline()
-
self.tokens.append(cross.TextToken('```\n'))
-
self.in_pre = True
-
-
elif tag == 'blockquote':
-
append_newline()
-
self.tokens.append(cross.TextToken('\n> '))
-
-
elif tag == 'ul':
-
self.list_stack.append('ul')
-
append_newline()
-
-
elif tag == 'ol':
-
self.list_stack.append('ol')
-
append_newline()
-
-
elif tag == 'li':
-
indent = ' ' * (len(self.list_stack) - 1)
-
if self.list_stack and self.list_stack[-1] == 'ul':
-
self.tokens.append(cross.TextToken(f'{indent}- '))
-
elif self.list_stack and self.list_stack[-1] == 'ol':
-
self.tokens.append(cross.TextToken(f'{indent}1. '))
-
-
elif tag == {'h1', 'h2', 'h3', 'h4', 'h5', 'h6'}:
-
level = int(tag[1])
-
self.tokens.append(cross.TextToken("\n" + "#" * level + " "))
+
match tag:
+
case 'br':
+
self.tokens.append(cross.TextToken(' \n'))
+
case 'a':
+
href = attrs_dict.get('href', '')
+
self.anchor_stack.append(href)
+
case 'strong', 'b':
+
self.tokens.append(cross.TextToken('**'))
+
case 'em', 'i':
+
self.tokens.append(cross.TextToken('*'))
+
case 'del', 's':
+
self.tokens.append(cross.TextToken('~~'))
+
case 'code':
+
if not self.in_pre:
+
self.tokens.append(cross.TextToken('`'))
+
self.in_code = True
+
case 'pre':
+
append_newline()
+
self.tokens.append(cross.TextToken('```\n'))
+
self.in_pre = True
+
case 'blockquote':
+
append_newline()
+
self.tokens.append(cross.TextToken('> '))
+
case 'ul', 'ol':
+
self.list_stack.append(tag)
+
append_newline()
+
case 'li':
+
indent = ' ' * (len(self.list_stack) - 1)
+
if self.list_stack and self.list_stack[-1] == 'ul':
+
self.tokens.append(cross.TextToken(f'{indent}- '))
+
elif self.list_stack and self.list_stack[-1] == 'ol':
+
self.tokens.append(cross.TextToken(f'{indent}1. '))
+
case _:
+
if tag in {'h1', 'h2', 'h3', 'h4', 'h5', 'h6'}:
+
level = int(tag[1])
+
self.tokens.append(cross.TextToken("\n" + "#" * level + " "))
self.current_tag_stack.append(tag)
···
if tag in self.current_tag_stack:
self.current_tag_stack.remove(tag)
-
if tag == 'p':
-
self.tokens.append(cross.TextToken('\n\n'))
+
match tag:
+
case 'p':
+
self.tokens.append(cross.TextToken('\n\n'))
+
case 'a':
+
href = self.anchor_stack.pop()
+
anchor_data = ''.join(self.anchor_data)
+
self.anchor_data = []
-
elif tag == 'a':
-
href = self.anchor_stack.pop()
-
anchor_data = ''.join(self.anchor_data)
-
self.anchor_data = []
-
-
if anchor_data.startswith('#'):
-
as_tag = anchor_data[1:].lower()
-
if any(as_tag == block for block in self.tags):
-
self.tokens.append(cross.TagToken(anchor_data[1:]))
-
elif anchor_data.startswith('@'):
-
match = next(
-
(pair for pair in self.mentions if anchor_data in pair),
-
None
-
)
+
if anchor_data.startswith('#'):
+
as_tag = anchor_data[1:].lower()
+
if any(as_tag == block for block in self.tags):
+
self.tokens.append(cross.TagToken(anchor_data[1:]))
+
elif anchor_data.startswith('@'):
+
match = next(
+
(pair for pair in self.mentions if anchor_data in pair),
+
None
+
)
-
if match:
-
self.tokens.append(cross.MentionToken(match[1], ''))
-
else:
-
self.tokens.append(cross.LinkToken(href, anchor_data))
-
-
elif tag == 'strong' or tag == 'b':
-
self.tokens.append(cross.TextToken('**'))
-
-
elif tag == 'em' or tag == 'i':
-
self.tokens.append(cross.TextToken('*'))
-
-
elif tag == 'del' or tag == 's':
-
self.tokens.append(cross.TextToken('~~'))
-
-
elif tag == 'code':
-
if not self.in_pre and self.in_code:
-
self.tokens.append(cross.TextToken('`'))
-
self.in_code = False
-
-
elif tag == 'pre':
-
self.tokens.append(cross.TextToken('\n```\n'))
-
self.in_pre = False
-
-
elif tag == 'blockquote':
-
self.tokens.append(cross.TextToken('\n'))
-
-
elif tag == 'ul' or tag == 'ol':
-
if self.list_stack:
-
self.list_stack.pop()
-
self.tokens.append(cross.TextToken('\n'))
-
-
elif tag == 'li':
-
self.tokens.append(cross.TextToken('\n'))
-
-
elif tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
-
self.tokens.append(cross.TextToken('\n'))
+
if match:
+
self.tokens.append(cross.MentionToken(match[1], ''))
+
else:
+
self.tokens.append(cross.LinkToken(href, anchor_data))
+
case 'strong', 'b':
+
self.tokens.append(cross.TextToken('**'))
+
case 'em', 'i':
+
self.tokens.append(cross.TextToken('*'))
+
case 'del', 's':
+
self.tokens.append(cross.TextToken('~~'))
+
case 'code':
+
if not self.in_pre and self.in_code:
+
self.tokens.append(cross.TextToken('`'))
+
self.in_code = False
+
case 'pre':
+
self.tokens.append(cross.TextToken('\n```\n'))
+
self.in_pre = False
+
case 'blockquote':
+
self.tokens.append(cross.TextToken('\n'))
+
case 'ul', 'ol':
+
if self.list_stack:
+
self.list_stack.pop()
+
self.tokens.append(cross.TextToken('\n'))
+
case 'li':
+
self.tokens.append(cross.TextToken('\n'))
+
case _:
+
if tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+
self.tokens.append(cross.TextToken('\n'))
def get_tokens(self) -> list[cross.Token]:
if not self.tokens: