social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

fix md parser, add grapheme, start on fragment splitter

zenfyr.dev 7e647c4b 9f45c8ac

verified
+1
pyproject.toml
···
requires-python = ">=3.12"
dependencies = [
"dnspython>=2.8.0",
"python-magic>=0.4.27",
"requests>=2.32.5",
"websockets>=15.0.1",
···
requires-python = ">=3.12"
dependencies = [
"dnspython>=2.8.0",
+
"grapheme>=0.6.0",
"python-magic>=0.4.27",
"requests>=2.32.5",
"websockets>=15.0.1",
+33 -48
util/markdown.py
···
import re
import cross.fragments as f
from util.html import HTMLToFragmentsParser
···
total: int = len(markdown)
# no match == processed fragments
-
events: list[tuple[int, int, re.Match[str] | None, str]] = []
-
events.extend([(fg.start, fg.end, None, "html") for fg in fragments])
while index < total:
ch = markdown[index]
rmatch = None
···
)
last_end = end
-
def update_fragments(start: int, s, offset: int):
-
nonlocal fragments
-
for fg in fragments:
-
if fg != s and fg.start >= start:
-
fg.start += offset
-
fg.end += offset
-
new_text = ""
-
last_pos = 0
for start, end, rmatch, event in events:
-
if start > last_pos:
-
new_text += markdown[last_pos:start]
-
if not rmatch:
-
new_text += markdown[start:end]
-
last_pos = end
continue
match event:
case "inline_link":
label = rmatch.group(1)
href = rmatch.group(2)
-
fg = f.LinkFragment(start=start, end=start + len(label), url=href)
-
fragments.append(fg)
-
update_fragments(start, fg, -(end - (start + len(label))))
-
new_text += label
-
# case "autolink":
-
# url = rmatch.group(0)
-
# fg = f.LinkFragment(start=start, end=end - 2, url=url)
-
# fragments.append(fg)
-
# update_fragments(start, fg, -2)
-
# new_text += url
case "hashtag":
-
tag = rmatch.group(0)
-
fragments.append(
-
f.TagFragment(
-
start=start,
-
end=end,
-
tag=tag[1:] if tag.startswith("#") else tag,
-
)
-
)
-
new_text += markdown[start:end]
case "mention":
mention = rmatch.group(0)
-
fragments.append(
-
f.MentionFragment(
-
start=start,
-
end=end,
-
uri=mention[1:] if mention.startswith("@") else mention,
-
)
-
)
-
new_text += markdown[start:end]
case "url":
url = rmatch.group(0)
-
fragments.append(f.LinkFragment(start=start, end=end, url=url))
-
new_text += markdown[start:end]
case _:
pass
-
last_pos = end
-
if last_pos < len(markdown):
-
new_text += markdown[last_pos:]
-
return new_text, fragments
···
+
from dataclasses import replace
import re
import cross.fragments as f
from util.html import HTMLToFragmentsParser
···
total: int = len(markdown)
# no match == processed fragments
+
events: list[tuple[int, int, re.Match[str] | f.Fragment, str]] = []
+
events.extend([(fg.start, fg.end, fg, "html") for fg in fragments])
while index < total:
ch = markdown[index]
rmatch = None
···
)
last_end = end
+
ntext: list[str] = []
+
nfragments: list[f.Fragment] = []
+
offset: int = 0
+
last_index: int = 0
+
events.sort(key=lambda x: x[0])
for start, end, rmatch, event in events:
+
ntext.append(markdown[last_index:start])
+
if isinstance(rmatch, f.Fragment):
+
ntext.append(markdown[start:end])
+
nfg = replace(rmatch, start=start + offset, end=end + offset)
+
nfragments.append(nfg)
+
last_index = end
continue
+
nstart = start + offset
+
nend = end + offset
match event:
case "inline_link":
label = rmatch.group(1)
href = rmatch.group(2)
+
ntext.append(label)
+
+
delta = len(label) - (end - start)
+
offset += delta
+
+
nfragments.append(f.LinkFragment(start=nstart, end=nstart + len(label), url=href))
case "hashtag":
+
tag = rmatch.group(1)
+
ntext.append(markdown[start:end])
+
nfragments.append(f.TagFragment(start=nstart, end=nend, tag=tag))
case "mention":
mention = rmatch.group(0)
+
ntext.append(markdown[start:end])
+
mention = mention[1:] if mention.startswith("@") else mention
+
nfragments.append(f.MentionFragment(start=nstart, end=nend, uri=mention))
case "url":
url = rmatch.group(0)
+
ntext.append(markdown[start:end])
+
nfragments.append(f.LinkFragment(start=nstart, end=nend, url=url))
case _:
pass
+
last_index = end
+
ntext.append(markdown[last_index:])
+
return ''.join(ntext), nfragments
+72
util/splitter.py
···
···
+
import grapheme
+
from cross.fragments import Fragment, LinkFragment
+
from dataclasses import replace
+
+
+
def canonical_label(label: str | None, href: str):
+
if not label or label == href:
+
return True
+
+
split = href.split("://", 1)
+
if len(split) > 1:
+
if split[1] == label:
+
return True
+
+
return False
+
+
+
class FragmentSplitter:
+
def __init__(self, climit: int, urllen: int):
+
self.climit: int = climit
+
self.urllen: int = urllen
+
+
def normalize_link(self, label: str, url: str) -> str:
+
#if canonical_label(label, url):
+
# if self.urltrunc == "dotted":
+
# nlabel = url.split("://", 1)[1]
+
# if len(nlabel) <= self.urllen:
+
# return nlabel
+
# return nlabel[: self.urllen - 1] + "…"
+
return label
+
+
def url_normalize(
+
self, text: str, fragments: list[Fragment]
+
) -> tuple[str, list[Fragment]]:
+
if self.urllen == -1:
+
return text, fragments
+
+
ntext: list[str] = []
+
nfragments: list[Fragment] = []
+
+
offset: int = 0
+
last_index: int = 0
+
+
fragments = [fg for fg in fragments]
+
fragments.sort(key=lambda x: x.start)
+
+
for fg in fragments:
+
ntext.append(text[last_index:fg.start])
+
label = text[fg.start:fg.end]
+
nlabel = label
+
if isinstance(fg, LinkFragment):
+
nlabel = self.normalize_link(nlabel, fg.url)
+
ntext.append(nlabel)
+
+
nfg = replace(fg, start=fg.start + offset)
+
change = len(nlabel) - len(label)
+
offset += change
+
nfg = replace(nfg, end=fg.end + offset)
+
+
nfragments.append(nfg)
+
last_index = fg.end
+
+
ntext.append(text[last_index:])
+
+
return ''.join(ntext), nfragments
+
+
def split(
+
self, text: str, fragments: list[Fragment]
+
) -> list[tuple[str, list[Fragment]]]:
+
text, fragments = self.url_normalize(text, fragments)
+
if grapheme.length(text) <= self.climit:
+
return [(text, fragments)]
+8
uv.lock
···
]
[[package]]
name = "idna"
version = "3.11"
source = { registry = "https://pypi.org/simple" }
···
source = { virtual = "." }
dependencies = [
{ name = "dnspython" },
{ name = "python-magic" },
{ name = "requests" },
{ name = "websockets" },
···
[package.metadata]
requires-dist = [
{ name = "dnspython", specifier = ">=2.8.0" },
{ name = "python-magic", specifier = ">=0.4.27" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "websockets", specifier = ">=15.0.1" },
···
]
[[package]]
+
name = "grapheme"
+
version = "0.6.0"
+
source = { registry = "https://pypi.org/simple" }
+
sdist = { url = "https://files.pythonhosted.org/packages/ce/e7/bbaab0d2a33e07c8278910c1d0d8d4f3781293dfbc70b5c38197159046bf/grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca", size = 207306, upload-time = "2020-03-07T17:13:55.492Z" }
+
+
[[package]]
name = "idna"
version = "3.11"
source = { registry = "https://pypi.org/simple" }
···
source = { virtual = "." }
dependencies = [
{ name = "dnspython" },
+
{ name = "grapheme" },
{ name = "python-magic" },
{ name = "requests" },
{ name = "websockets" },
···
[package.metadata]
requires-dist = [
{ name = "dnspython", specifier = ">=2.8.0" },
+
{ name = "grapheme", specifier = ">=0.6.0" },
{ name = "python-magic", specifier = ">=0.4.27" },
{ name = "requests", specifier = ">=2.32.5" },
{ name = "websockets", specifier = ">=15.0.1" },