social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1import re
2from markdownify import markdownify as md
3import cross
4import logging, sys, os
5
6logging.basicConfig(stream=sys.stdout, level=logging.INFO)
7LOGGER = logging.getLogger("XPost")
8
9ALTERNATE = re.compile(r'\S+|\s+')
10
11def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
12 def start_new_block():
13 nonlocal current_block, blocks, current_length
14 if current_block:
15 blocks.append(current_block)
16 current_block = []
17 current_length = 0
18
19 def append_text_to_block(text_segment):
20 nonlocal current_block
21 # if the last element in the current block is also text, just append to it
22 if current_block and isinstance(current_block[-1], cross.TextToken):
23 current_block[-1].text += text_segment
24 else:
25 current_block.append(cross.TextToken(text_segment))
26
27 blocks: list[list[cross.Token]] = []
28 current_block: list[cross.Token] = []
29 current_length: int = 0
30
31 for token in tokens:
32 if isinstance(token, cross.TextToken):
33 # split content into alternating “words” (\S+) and “whitespace” (\s+).
34 # this ensures every space/newline is treated as its own segment.
35 segments: list[str] = ALTERNATE.findall(token.text)
36
37 for seg in segments:
38 if seg.isspace():
39 # whitespace segment: we count it, and if it doesn't fully fit,
40 # split the whitespace across blocks to preserve exact spacing.
41 seg_len: int = len(seg)
42 while seg_len > 0:
43 space_left = max_chars - current_length
44 if space_left == 0:
45 start_new_block()
46 continue
47
48 take = min(space_left, seg_len)
49 part = seg[:take]
50 append_text_to_block(part)
51
52 current_length += len(part)
53 seg = seg[take:]
54 seg_len -= take
55
56 if current_length == max_chars:
57 start_new_block()
58
59 else:
60 # seg is a “word” (no whitespace inside).
61 word: str = seg
62 wlen: int = len(word)
63
64 # if the word itself is longer than n, we must split it with hyphens.
65 if wlen > max_chars:
66 # first, if we're in the middle of a block, close it & start fresh.
67 if current_length > 0:
68 start_new_block()
69
70 remaining = word
71 # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
72 while len(remaining) > (max_chars - 1):
73 chunk = remaining[: max_chars - 1] + '-'
74 append_text_to_block(chunk)
75 # that chunk fills the current block
76 start_new_block()
77 remaining = remaining[max_chars - 1 :]
78
79 # now whatever remains is ≤ n characters
80 if remaining:
81 append_text_to_block(remaining)
82 current_length = len(remaining)
83
84 else:
85 # word fits fully within a block (≤ n).
86 if current_length + wlen <= max_chars:
87 append_text_to_block(word)
88 current_length += wlen
89 else:
90 # not enough space in current block → start a new one
91 start_new_block()
92 append_text_to_block(word)
93 current_length = wlen
94
95 elif isinstance(token, cross.LinkToken):
96 link_len = min(len(token.label), 35)
97
98 if current_length + link_len <= max_chars:
99 current_block.append(token)
100 current_length += link_len
101 else:
102 start_new_block()
103 current_block.append(token)
104 current_length = link_len
105
106 elif isinstance(token, cross.TagToken):
107 # we treat a hashtag like “#tagname” for counting.
108 hashtag_len = 1 + len(token.tag)
109 if current_length + hashtag_len <= max_chars:
110 current_block.append(token)
111 current_length += hashtag_len
112 else:
113 start_new_block()
114 current_block.append(token)
115 current_length = hashtag_len
116
117 else:
118 # if you happen to have other types, just append them without affecting length.
119 current_block.append(token)
120
121 # append any remaining tokens as the final block
122 if current_block:
123 blocks.append(current_block)
124
125 return blocks
126
127def safe_get(obj: dict, key: str, default):
128 val = obj.get(key, default)
129 return val if val else default
130
131def value_or_envvar(text: str) -> str:
132 if text.startswith('env:'):
133 return os.environ.get(text[4:], '')
134 return text
135
136def get_or_envvar(obj: dict, key: str):
137 return value_or_envvar(obj.get(key, ''))