social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1import re
2import cross
3import logging, sys, os
4
5logging.basicConfig(stream=sys.stdout, level=logging.INFO)
6LOGGER = logging.getLogger("XPost")
7
8ALTERNATE = re.compile(r'\S+|\s+')
9
10def canonical_label(label: str | None, href: str):
11 if not label or label == href:
12 return True
13
14 split = href.split('://', 1)
15 if len(split) > 1:
16 if split[1] == label:
17 return True
18
19 return False
20
21def split_tokens(tokens: list[cross.Token], max_chars: int, max_link_len: int = 35) -> list[list[cross.Token]]:
22 def start_new_block():
23 nonlocal current_block, blocks, current_length
24 if current_block:
25 blocks.append(current_block)
26 current_block = []
27 current_length = 0
28
29 def append_text_to_block(text_segment):
30 nonlocal current_block
31 # if the last element in the current block is also text, just append to it
32 if current_block and isinstance(current_block[-1], cross.TextToken):
33 current_block[-1].text += text_segment
34 else:
35 current_block.append(cross.TextToken(text_segment))
36
37 blocks: list[list[cross.Token]] = []
38 current_block: list[cross.Token] = []
39 current_length: int = 0
40
41 for token in tokens:
42 if isinstance(token, cross.TextToken):
43 # split content into alternating “words” (\S+) and “whitespace” (\s+).
44 # this ensures every space/newline is treated as its own segment.
45 segments: list[str] = ALTERNATE.findall(token.text)
46
47 for seg in segments:
48 if seg.isspace():
49 # whitespace segment: we count it, and if it doesn't fully fit,
50 # split the whitespace across blocks to preserve exact spacing.
51 seg_len: int = len(seg)
52 while seg_len > 0:
53 space_left = max_chars - current_length
54 if space_left == 0:
55 start_new_block()
56 continue
57
58 take = min(space_left, seg_len)
59 part = seg[:take]
60 append_text_to_block(part)
61
62 current_length += len(part)
63 seg = seg[take:]
64 seg_len -= take
65
66 if current_length == max_chars:
67 start_new_block()
68
69 else:
70 # seg is a “word” (no whitespace inside).
71 word: str = seg
72 wlen: int = len(word)
73
74 # if the word itself is longer than n, we must split it with hyphens.
75 if wlen > max_chars:
76 # first, if we're in the middle of a block, close it & start fresh.
77 if current_length > 0:
78 start_new_block()
79
80 remaining = word
81 # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
82 while len(remaining) > (max_chars - 1):
83 chunk = remaining[: max_chars - 1] + '-'
84 append_text_to_block(chunk)
85 # that chunk fills the current block
86 start_new_block()
87 remaining = remaining[max_chars - 1 :]
88
89 # now whatever remains is ≤ n characters
90 if remaining:
91 append_text_to_block(remaining)
92 current_length = len(remaining)
93
94 else:
95 # word fits fully within a block (≤ n).
96 if current_length + wlen <= max_chars:
97 append_text_to_block(word)
98 current_length += wlen
99 else:
100 # not enough space in current block → start a new one
101 start_new_block()
102 append_text_to_block(word)
103 current_length = wlen
104
105 elif isinstance(token, cross.LinkToken):
106 link_len = len(token.label)
107 if canonical_label(token.label, token.href):
108 link_len = min(link_len, max_link_len)
109
110 if current_length + link_len <= max_chars:
111 current_block.append(token)
112 current_length += link_len
113 else:
114 start_new_block()
115 current_block.append(token)
116 current_length = link_len
117
118 elif isinstance(token, cross.TagToken):
119 # we treat a hashtag like “#tagname” for counting.
120 hashtag_len = 1 + len(token.tag)
121 if current_length + hashtag_len <= max_chars:
122 current_block.append(token)
123 current_length += hashtag_len
124 else:
125 start_new_block()
126 current_block.append(token)
127 current_length = hashtag_len
128
129 else:
130 # if you happen to have other types, just append them without affecting length.
131 current_block.append(token)
132
133 # append any remaining tokens as the final block
134 if current_block:
135 blocks.append(current_block)
136
137 return blocks
138
139def safe_get(obj: dict, key: str, default):
140 val = obj.get(key, default)
141 return val if val else default
142
143def as_envvar(text: str | None) -> str | None:
144 if not text:
145 return None
146
147 if text.startswith('env:'):
148 return os.environ.get(text[4:], '')
149
150 return text