social media crossposting tool. 3rd time's the charm
mastodon
misskey
crossposting
bluesky
1import re
2from markdownify import markdownify as md
3import cross
4import logging, sys, os
5
6logging.basicConfig(stream=sys.stdout, level=logging.INFO)
7LOGGER = logging.getLogger("XPost")
8
9ALTERNATE = re.compile(r'\S+|\s+')
10
11def canonical_label(label: str | None, href: str):
12 if not label or label == href:
13 return True
14
15 split = href.split('://', 1)
16 if len(split) > 1:
17 if split[1] == label:
18 return True
19
20 return False
21
22def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
23 def start_new_block():
24 nonlocal current_block, blocks, current_length
25 if current_block:
26 blocks.append(current_block)
27 current_block = []
28 current_length = 0
29
30 def append_text_to_block(text_segment):
31 nonlocal current_block
32 # if the last element in the current block is also text, just append to it
33 if current_block and isinstance(current_block[-1], cross.TextToken):
34 current_block[-1].text += text_segment
35 else:
36 current_block.append(cross.TextToken(text_segment))
37
38 blocks: list[list[cross.Token]] = []
39 current_block: list[cross.Token] = []
40 current_length: int = 0
41
42 for token in tokens:
43 if isinstance(token, cross.TextToken):
44 # split content into alternating “words” (\S+) and “whitespace” (\s+).
45 # this ensures every space/newline is treated as its own segment.
46 segments: list[str] = ALTERNATE.findall(token.text)
47
48 for seg in segments:
49 if seg.isspace():
50 # whitespace segment: we count it, and if it doesn't fully fit,
51 # split the whitespace across blocks to preserve exact spacing.
52 seg_len: int = len(seg)
53 while seg_len > 0:
54 space_left = max_chars - current_length
55 if space_left == 0:
56 start_new_block()
57 continue
58
59 take = min(space_left, seg_len)
60 part = seg[:take]
61 append_text_to_block(part)
62
63 current_length += len(part)
64 seg = seg[take:]
65 seg_len -= take
66
67 if current_length == max_chars:
68 start_new_block()
69
70 else:
71 # seg is a “word” (no whitespace inside).
72 word: str = seg
73 wlen: int = len(word)
74
75 # if the word itself is longer than n, we must split it with hyphens.
76 if wlen > max_chars:
77 # first, if we're in the middle of a block, close it & start fresh.
78 if current_length > 0:
79 start_new_block()
80
81 remaining = word
82 # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
83 while len(remaining) > (max_chars - 1):
84 chunk = remaining[: max_chars - 1] + '-'
85 append_text_to_block(chunk)
86 # that chunk fills the current block
87 start_new_block()
88 remaining = remaining[max_chars - 1 :]
89
90 # now whatever remains is ≤ n characters
91 if remaining:
92 append_text_to_block(remaining)
93 current_length = len(remaining)
94
95 else:
96 # word fits fully within a block (≤ n).
97 if current_length + wlen <= max_chars:
98 append_text_to_block(word)
99 current_length += wlen
100 else:
101 # not enough space in current block → start a new one
102 start_new_block()
103 append_text_to_block(word)
104 current_length = wlen
105
106 elif isinstance(token, cross.LinkToken):
107 link_len = len(token.label)
108 if canonical_label(token.label, token.href):
109 link_len = min(link_len, 35)
110
111 if current_length + link_len <= max_chars:
112 current_block.append(token)
113 current_length += link_len
114 else:
115 start_new_block()
116 current_block.append(token)
117 current_length = link_len
118
119 elif isinstance(token, cross.TagToken):
120 # we treat a hashtag like “#tagname” for counting.
121 hashtag_len = 1 + len(token.tag)
122 if current_length + hashtag_len <= max_chars:
123 current_block.append(token)
124 current_length += hashtag_len
125 else:
126 start_new_block()
127 current_block.append(token)
128 current_length = hashtag_len
129
130 else:
131 # if you happen to have other types, just append them without affecting length.
132 current_block.append(token)
133
134 # append any remaining tokens as the final block
135 if current_block:
136 blocks.append(current_block)
137
138 return blocks
139
140def safe_get(obj: dict, key: str, default):
141 val = obj.get(key, default)
142 return val if val else default
143
144def value_or_envvar(text: str) -> str:
145 if text.startswith('env:'):
146 return os.environ.get(text[4:], '')
147 return text
148
149def get_or_envvar(obj: dict, key: str):
150 return value_or_envvar(obj.get(key, ''))