social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

more minor work

zenfyr.dev 74a77d94 e42b0536

verified
Changed files
+91 -65
mastodon
tests
util
+19 -16
mastodon/parser.py
···
-
from typing import override
+
from typing import Any, override
import cross.fragments as f
from util.html import HTMLToFragmentsParser
class StatusParser(HTMLToFragmentsParser):
-
def __init__(self) -> None:
+
def __init__(self, status: dict[str, Any]) -> None:
super().__init__()
+
self.tags: set[str] = set(tag["url"] for tag in status.get("tags", []))
+
self.mentions: set[str] = set(m["url"] for m in status.get("mentions", []))
@override
def handle_a_endtag(self):
-
current_end = len(self.text)
+
current_end = len(self.builder)
start, _attr = self._tag_stack.pop("a")
-
href = _attr.get('href')
+
href = _attr.get("href")
if href and current_end > start:
-
cls = _attr.get('class', '')
+
cls = _attr.get("class", "")
if cls:
-
if 'hashtag' in cls:
-
tag = self.text[start:current_end]
-
tag = tag[1:] if tag.startswith('#') else tag
+
if "hashtag" in cls and href in self.tags:
+
tag = self.builder[start:current_end]
+
tag = tag[1:] if tag.startswith(b"#") else tag
self.fragments.append(
-
f.TagFragment(start=start, end=current_end, tag=tag)
+
f.TagFragment(
+
start=start, end=current_end, tag=tag.decode("utf-8")
+
)
)
return
-
if 'mention' in cls: # TODO put the full acct in the fragment
-
mention = self.text[start:current_end]
-
mention = mention[1:] if mention.startswith('@') else mention
-
self.fragments.append(
-
f.MentionFragment(start=start, end=current_end, uri=mention)
-
)
-
return
+
if "mention" in cls:
+
if href in self.mentions:
+
self.fragments.append(
+
f.MentionFragment(start=start, end=current_end, uri=href)
+
)
+
return
self.fragments.append(
f.LinkFragment(start=start, end=current_end, url=href)
)
+19 -3
tests/util/html_test.py
···
import cross.fragments as f
import pytest
+
@pytest.fixture()
def parser():
return HTMLToFragmentsParser()
+
def test_html(parser: HTMLToFragmentsParser):
-
input = "<p><del>excuse</del> <em>me</em>, <strong>test</strong> post</p><blockquote><p>very testy <a href=\"https://google.com\" target=\"_blank\" rel=\"nofollow noopener\">post</a></p></blockquote><pre><code>cat &lt;&lt; food<br></code></pre>"
+
input = '<p><del>excuse</del> <em>me</em>, <strong>test</strong> post</p><blockquote><p>very testy <a href="https://google.com" target="_blank" rel="nofollow noopener">post</a></p></blockquote><pre><code>cat &lt;&lt; food<br></code></pre>'
parser.feed(input)
-
text, fragments = parser.get_result()
+
text, frgs = parser.get_result()
+
+
excepted = "~~excuse~~ *me*, **test** post\n\n> very testy post\n\n\n```\ncat << food\n```"
+
assert text == excepted
+
assert len(frgs) == 1
+
+
assert isinstance(frgs[0], f.LinkFragment)
+
assert frgs[0].start == 46 and frgs[0].end == 50
+
assert frgs[0].url == "https://google.com"
+
+
+
def test_keep_autolink(parser: HTMLToFragmentsParser):
+
input = "<https://google.com>"
+
parser.feed(input)
+
text, frgs = parser.get_result()
# TODO
-
#assert text == "~~excuse~~ *me*, **test** post\n\n> very testy post\n\n```\ncat << food\n```\n"
+
# assert text == input
+33 -34
tests/util/markdown_test.py
···
assert text == "https://google.com"
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.LinkFragment)
-
assert frg.start == 0 and frg.end == 18
-
assert frg.url == "https://google.com"
+
assert isinstance(frgs[0], f.LinkFragment)
+
assert frgs[0].start == 0 and frgs[0].end == 18
+
assert frgs[0].url == "https://google.com"
def test_link_emojis(parser: MarkdownParser):
···
assert text == input
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.LinkFragment)
-
assert frg.start == 9 and frg.end == 27
-
assert frg.url == "https://google.com"
+
assert isinstance(frgs[0], f.LinkFragment)
+
assert frgs[0].start == 9 and frgs[0].end == 27
+
assert frgs[0].url == "https://google.com"
def test_label_link(parser: MarkdownParser):
···
assert text == "hello"
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.LinkFragment)
-
assert frg.start == 0 and frg.end == 5
-
assert frg.url == "https://google.com"
+
assert isinstance(frgs[0], f.LinkFragment)
+
assert frgs[0].start == 0 and frgs[0].end == 5
+
assert frgs[0].url == "https://google.com"
def test_label_link_emojis(parser: MarkdownParser):
···
assert text == EMOJI
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.LinkFragment)
-
assert frg.start == 0 and frg.end == 8
-
assert frg.url == "https://google.com"
+
assert isinstance(frgs[0], f.LinkFragment)
+
assert frgs[0].start == 0 and frgs[0].end == 8
+
assert frgs[0].url == "https://google.com"
def test_tag(parser: MarkdownParser):
···
assert text == input
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.TagFragment)
-
assert frg.start == 0 and frg.end == 8
-
assert frg.tag == "testing"
+
assert isinstance(frgs[0], f.TagFragment)
+
assert frgs[0].start == 0 and frgs[0].end == 8
+
assert frgs[0].tag == "testing"
+
def test_tag_emojis(parser: MarkdownParser):
input = f"{EMOJI} #testing"
···
assert text == input
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.TagFragment)
-
assert frg.start == 9 and frg.end == 17
-
assert frg.tag == "testing"
+
assert isinstance(frgs[0], f.TagFragment)
+
assert frgs[0].start == 9 and frgs[0].end == 17
+
assert frgs[0].tag == "testing"
+
def test_mention(parser: MarkdownParser):
input = "@zen@merping.synth.download"
···
assert text == input
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.MentionFragment)
-
assert frg.start == 0 and frg.end == 27
-
assert frg.uri == "zen@merping.synth.download"
+
assert isinstance(frgs[0], f.MentionFragment)
+
assert frgs[0].start == 0 and frgs[0].end == 27
+
assert frgs[0].uri == "zen@merping.synth.download"
+
def test_mention_emojis(parser: MarkdownParser):
input = f"{EMOJI} @zen@merping.synth.download"
···
assert text == input
assert len(frgs) == 1
-
frg = frgs[0]
-
assert isinstance(frg, f.MentionFragment)
-
assert frg.start == 9 and frg.end == 36
-
assert frg.uri == "zen@merping.synth.download"
+
assert isinstance(frgs[0], f.MentionFragment)
+
assert frgs[0].start == 9 and frgs[0].end == 36
+
assert frgs[0].uri == "zen@merping.synth.download"
+
def test_mixed(parser: MarkdownParser):
input = "#testing_tag @zen@merping.synth.download [hello](https://zenfyr.dev/) hii! https://example.com"
text, frgs = parser.parse(input)
-
expected_text = "#testing_tag @zen@merping.synth.download hello hii! https://example.com"
+
expected_text = (
+
"#testing_tag @zen@merping.synth.download hello hii! https://example.com"
+
)
assert text == expected_text
assert len(frgs) == 4
···
assert frgs[3].start == 52 and frgs[3].end == 71
assert frgs[3].url == "https://example.com"
+
def test_mixed_html(parser: MarkdownParser):
-
input = f"<p>#testing_tag @zen@merping.synth.download</p> {EMOJI} <a href=\"https://zenfyr.dev/\"><b>hello</b></a> hii! https://example.com"
+
input = f'<p>#testing_tag @zen@merping.synth.download</p> {EMOJI} <a href="https://zenfyr.dev/"><b>hello</b></a> hii! https://example.com'
text, frgs = parser.parse(input)
expected_text = f"#testing_tag @zen@merping.synth.download\n\n {EMOJI} **hello** hii! https://example.com"
+20 -12
util/html.py
···
f.LinkFragment(start=start, end=current_end, url=href)
)
+
def append_newline(self):
+
if self.builder and not self.builder.endswith(b"\n"):
+
self.builder.extend(b"\n")
+
@override
def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:
_attr = dict(attrs)
-
-
def append_newline():
-
if self.builder and not self.builder.endswith(b"\n"):
-
self.builder.extend(b"\n")
if self.invisible:
return
···
self.builder.extend(b"`")
self.in_code = True
case "pre":
-
append_newline()
+
self.append_newline()
self.builder.extend(b"```\n")
self.in_pre = True
case "blockquote":
-
append_newline()
+
self.append_newline()
self.builder.extend(b"> ")
case "strong" | "b":
self.builder.extend(b"**")
···
self.builder.extend(b"~~")
case "br":
self.builder.extend(b"\n")
+
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
+
level = int(tag[1])
+
self.builder.extend(("\n" + "#" * level + " ").encode('utf-8'))
case _:
-
if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:
-
level = int(tag[1])
-
self.builder.extend(("\n" + "#" * level + " ").encode('utf-8'))
+
#self.builder.extend(f"<{tag}>".encode("utf-8"))
+
pass
+
@override
def handle_endtag(self, tag: str) -> None:
···
self.builder.extend(b"`")
self.in_code = False
case "pre":
-
self.builder.extend(b"\n```\n")
+
self.append_newline()
+
self.builder.extend(b"```\n")
self.in_pre = False
case "blockquote":
self.builder.extend(b"\n")
···
self.builder.extend(b"~~")
case "p":
self.builder.extend(b"\n\n")
+
case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":
+
self.builder.extend(b'\n')
case _:
-
if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:
-
self.builder.extend(b'\n')
+
#self.builder.extend(f"</{tag}>".encode("utf-8"))
+
pass
@override
def handle_data(self, data: str) -> None:
···
def get_result(self) -> tuple[str, list[f.Fragment]]:
if self.builder.endswith(b'\n\n'):
return self.builder[:-2].decode('utf-8'), self.fragments
+
if self.builder.endswith(b'\n'):
+
return self.builder[:-1].decode('utf-8'), self.fragments
return self.builder.decode('utf-8'), self.fragments