social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1import re
2from dataclasses import replace
3
4import grapheme
5
6from cross.tokens import LinkToken, TagToken, TextToken, Token
7
8
9def canonical_label(label: str | None, href: str):
10 if not label or label == href:
11 return True
12
13 split = href.split("://", 1)
14 if len(split) > 1:
15 if split[1] == label:
16 return True
17
18 return False
19
20
21ALTERNATE = re.compile(r"\S+|\s+")
22
23
24def split_tokens(
25 tokens: list[Token],
26 max_chars: int,
27 max_link_len: int = 35,
28) -> list[list[Token]]:
29 def new_block() -> None:
30 nonlocal blocks, block, length
31 if block:
32 blocks.append(block)
33 block, length = [], 0
34
35 def append_text(text: str) -> None:
36 nonlocal block
37 if block and isinstance(block[-1], TextToken):
38 block[-1] = replace(block[-1], text=block[-1].text + text)
39 else:
40 block.append(TextToken(text=text))
41
42 blocks: list[list[Token]] = []
43 block: list[Token] = []
44 length: int = 0
45
46 for tk in tokens:
47 if isinstance(tk, TagToken):
48 tag_len = 1 + grapheme.length(tk.tag)
49 if length + tag_len > max_chars:
50 new_block()
51 block.append(tk)
52 length += tag_len
53 continue
54 if isinstance(tk, LinkToken):
55 label_text = tk.label or ""
56 link_len = grapheme.length(label_text)
57
58 if canonical_label(tk.label, tk.href):
59 link_len = min(link_len, max_link_len)
60
61 if length + link_len <= max_chars:
62 block.append(tk)
63 length += link_len
64 continue
65
66 if length:
67 new_block()
68
69 remaining = label_text
70 while remaining:
71 room = (
72 max_chars
73 - length
74 - (0 if grapheme.length(remaining) <= max_chars else 1)
75 )
76 chunk = grapheme.slice(remaining, 0, room)
77 if grapheme.length(remaining) > room:
78 chunk += "-"
79
80 block.append(replace(tk, label=chunk))
81 length += grapheme.length(chunk)
82
83 remaining = grapheme.slice(remaining, room, grapheme.length(remaining))
84 if remaining:
85 new_block()
86 continue
87 if isinstance(tk, TextToken):
88 for seg in ALTERNATE.findall(tk.text):
89 seg_len = grapheme.length(seg)
90
91 if length + seg_len <= max_chars - (0 if seg.isspace() else 1):
92 append_text(seg)
93 length += seg_len
94 continue
95
96 if length:
97 new_block()
98
99 if not seg.isspace():
100 while grapheme.length(seg) > max_chars - 1:
101 chunk = grapheme.slice(seg, 0, max_chars - 1) + "-"
102 append_text(chunk)
103 new_block()
104 seg = grapheme.slice(seg, max_chars - 1, grapheme.length(seg))
105 else:
106 while grapheme.length(seg) > max_chars:
107 chunk = grapheme.slice(seg, 0, max_chars)
108 append_text(chunk)
109 new_block()
110 seg = grapheme.slice(seg, max_chars, grapheme.length(seg))
111
112 if seg:
113 append_text(seg)
114 length = grapheme.length(seg)
115 continue
116 block.append(tk)
117 if block:
118 blocks.append(block)
119
120 return blocks