social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1import re
2from markdownify import markdownify as md
3import cross
4import logging, sys, os
5
6logging.basicConfig(stream=sys.stdout, level=logging.INFO)
7LOGGER = logging.getLogger("XPost")
8
9ALTERNATE = re.compile(r'\S+|\s+')
10
11DEFAULT_SETTINGS: dict = {
12 'bluesky': {
13 'quote_gate': False,
14 'thread_gate': [
15 'everybody'
16 ]
17 }
18}
19
20def tokenize_html(content: str):
21 return tokenize_markdown(md(content, autolinks=False))
22
23def tokenize_markdown(md) -> list[cross.Token]:
24 tokens = []
25 i = 0
26 length = len(md)
27
28 while i < length:
29 if md[i] == '!' and i + 1 < length and md[i + 1] == '[':
30 # media
31 start = i
32 i += 2
33 alt_text = ''
34 while i < length and md[i] != ']':
35 alt_text += md[i]
36 i += 1
37 i += 1 # skip ']
38 if i < length and md[i] == '(':
39 i += 1
40 url = ''
41 while i < length and md[i] != ')':
42 url += md[i]
43 i += 1
44 i += 1 # skip )
45 #tokens.append({'type': 'media', 'alt': alt_text, 'url': url})
46 else:
47 tokens.append(cross.TextToken(md[start:i]))
48 elif md[i] == '[':
49 # link or special
50 start = i
51 i += 1
52 link_text = ''
53 while i < length and md[i] != ']':
54 link_text += md[i]
55 i += 1
56 i += 1 # skip ]
57 if i < length and md[i] == '(':
58 i += 1
59 url = ''
60 while i < length and md[i] != ')':
61 url += md[i]
62 i += 1
63 i += 1 # skip )
64 if link_text.startswith('#'):
65 tokens.append(cross.TagToken(link_text[1:]))
66 elif link_text.startswith('@'):
67 tokens.append(cross.MentionToken(link_text[1:], url))
68 elif link_text.startswith('http://') or link_text.startswith('https://'):
69 tokens.append(cross.LinkToken(url, link_text))
70 else:
71 tokens.append(cross.LinkToken(url, link_text))
72 else:
73 tokens.append(cross.TextToken(md[start:i]))
74 else:
75 # plain text
76 start = i
77 while i < length and md[i] != '[' and not (md[i] == '!' and i + 1 < length and md[i + 1] == '['):
78 i += 1
79 tokens.append(cross.TextToken(md[start:i]))
80 return tokens
81
82
83def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
84 def start_new_block():
85 nonlocal current_block, blocks, current_length
86 if current_block:
87 blocks.append(current_block)
88 current_block = []
89 current_length = 0
90
91 def append_text_to_block(text_segment):
92 nonlocal current_block
93 # if the last element in the current block is also text, just append to it
94 if current_block and isinstance(current_block[-1], cross.TextToken):
95 current_block[-1].text += text_segment
96 else:
97 current_block.append(cross.TextToken(text_segment))
98
99 blocks: list[list[cross.Token]] = []
100 current_block: list[cross.Token] = []
101 current_length: int = 0
102
103 for token in tokens:
104 if isinstance(token, cross.TextToken):
105 # split content into alternating “words” (\S+) and “whitespace” (\s+).
106 # this ensures every space/newline is treated as its own segment.
107 segments: list[str] = ALTERNATE.findall(token.text)
108
109 for seg in segments:
110 if seg.isspace():
111 # whitespace segment: we count it, and if it doesn't fully fit,
112 # split the whitespace across blocks to preserve exact spacing.
113 seg_len: int = len(seg)
114 while seg_len > 0:
115 space_left = max_chars - current_length
116 if space_left == 0:
117 start_new_block()
118 continue
119
120 take = min(space_left, seg_len)
121 part = seg[:take]
122 append_text_to_block(part)
123
124 current_length += len(part)
125 seg = seg[take:]
126 seg_len -= take
127
128 if current_length == max_chars:
129 start_new_block()
130
131 else:
132 # seg is a “word” (no whitespace inside).
133 word: str = seg
134 wlen: int = len(word)
135
136 # if the word itself is longer than n, we must split it with hyphens.
137 if wlen > max_chars:
138 # first, if we're in the middle of a block, close it & start fresh.
139 if current_length > 0:
140 start_new_block()
141
142 remaining = word
143 # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
144 while len(remaining) > (max_chars - 1):
145 chunk = remaining[: max_chars - 1] + '-'
146 append_text_to_block(chunk)
147 # that chunk fills the current block
148 start_new_block()
149 remaining = remaining[max_chars - 1 :]
150
151 # now whatever remains is ≤ n characters
152 if remaining:
153 append_text_to_block(remaining)
154 current_length = len(remaining)
155
156 else:
157 # word fits fully within a block (≤ n).
158 if current_length + wlen <= max_chars:
159 append_text_to_block(word)
160 current_length += wlen
161 else:
162 # not enough space in current block → start a new one
163 start_new_block()
164 append_text_to_block(word)
165 current_length = wlen
166
167 elif isinstance(token, cross.LinkToken):
168 link_len = min(len(token.label), 35)
169
170 if current_length + link_len <= max_chars:
171 current_block.append(token)
172 current_length += link_len
173 else:
174 start_new_block()
175 current_block.append(token)
176 current_length = link_len
177
178 elif isinstance(token, cross.TagToken):
179 # we treat a hashtag like “#tagname” for counting.
180 hashtag_len = 1 + len(token.tag)
181 if current_length + hashtag_len <= max_chars:
182 current_block.append(token)
183 current_length += hashtag_len
184 else:
185 start_new_block()
186 current_block.append(token)
187 current_length = hashtag_len
188
189 else:
190 # if you happen to have other types, just append them without affecting length.
191 current_block.append(token)
192
193 # append any remaining tokens as the final block
194 if current_block:
195 blocks.append(current_block)
196
197 return blocks
198
199def safe_get(obj: dict, key: str, default):
200 val = obj.get(key, default)
201 return val if val else default
202
203def value_or_envvar(text: str) -> str:
204 if text.startswith('env:'):
205 return os.environ.get(text[4:], '')
206 return text
207
208def get_or_envvar(obj: dict, key: str):
209 return value_or_envvar(obj.get(key, ''))