social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1import re 2from markdownify import markdownify as md 3import cross 4import logging, sys, os 5 6logging.basicConfig(stream=sys.stdout, level=logging.INFO) 7LOGGER = logging.getLogger("XPost") 8 9ALTERNATE = re.compile(r'\S+|\s+') 10 11DEFAULT_SETTINGS: dict = { 12 'bluesky': { 13 'quote_gate': False, 14 'thread_gate': [ 15 'everybody' 16 ] 17 } 18} 19 20def tokenize_html(content: str): 21 return tokenize_markdown(md(content, autolinks=False)) 22 23def tokenize_markdown(md) -> list[cross.Token]: 24 tokens = [] 25 i = 0 26 length = len(md) 27 28 while i < length: 29 if md[i] == '!' and i + 1 < length and md[i + 1] == '[': 30 # media 31 start = i 32 i += 2 33 alt_text = '' 34 while i < length and md[i] != ']': 35 alt_text += md[i] 36 i += 1 37 i += 1 # skip '] 38 if i < length and md[i] == '(': 39 i += 1 40 url = '' 41 while i < length and md[i] != ')': 42 url += md[i] 43 i += 1 44 i += 1 # skip ) 45 #tokens.append({'type': 'media', 'alt': alt_text, 'url': url}) 46 else: 47 tokens.append(cross.TextToken(md[start:i])) 48 elif md[i] == '[': 49 # link or special 50 start = i 51 i += 1 52 link_text = '' 53 while i < length and md[i] != ']': 54 link_text += md[i] 55 i += 1 56 i += 1 # skip ] 57 if i < length and md[i] == '(': 58 i += 1 59 url = '' 60 while i < length and md[i] != ')': 61 url += md[i] 62 i += 1 63 i += 1 # skip ) 64 if link_text.startswith('#'): 65 tokens.append(cross.TagToken(link_text[1:])) 66 elif link_text.startswith('@'): 67 tokens.append(cross.MentionToken(link_text[1:], url)) 68 elif link_text.startswith('http://') or link_text.startswith('https://'): 69 tokens.append(cross.LinkToken(url, link_text)) 70 else: 71 tokens.append(cross.LinkToken(url, link_text)) 72 else: 73 tokens.append(cross.TextToken(md[start:i])) 74 else: 75 # plain text 76 start = i 77 while i < length and md[i] != '[' and not (md[i] == '!' and i + 1 < length and md[i + 1] == '['): 78 i += 1 79 tokens.append(cross.TextToken(md[start:i])) 80 return tokens 81 82 83def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]: 84 def start_new_block(): 85 nonlocal current_block, blocks, current_length 86 if current_block: 87 blocks.append(current_block) 88 current_block = [] 89 current_length = 0 90 91 def append_text_to_block(text_segment): 92 nonlocal current_block 93 # if the last element in the current block is also text, just append to it 94 if current_block and isinstance(current_block[-1], cross.TextToken): 95 current_block[-1].text += text_segment 96 else: 97 current_block.append(cross.TextToken(text_segment)) 98 99 blocks: list[list[cross.Token]] = [] 100 current_block: list[cross.Token] = [] 101 current_length: int = 0 102 103 for token in tokens: 104 if isinstance(token, cross.TextToken): 105 # split content into alternating “words” (\S+) and “whitespace” (\s+). 106 # this ensures every space/newline is treated as its own segment. 107 segments: list[str] = ALTERNATE.findall(token.text) 108 109 for seg in segments: 110 if seg.isspace(): 111 # whitespace segment: we count it, and if it doesn't fully fit, 112 # split the whitespace across blocks to preserve exact spacing. 113 seg_len: int = len(seg) 114 while seg_len > 0: 115 space_left = max_chars - current_length 116 if space_left == 0: 117 start_new_block() 118 continue 119 120 take = min(space_left, seg_len) 121 part = seg[:take] 122 append_text_to_block(part) 123 124 current_length += len(part) 125 seg = seg[take:] 126 seg_len -= take 127 128 if current_length == max_chars: 129 start_new_block() 130 131 else: 132 # seg is a “word” (no whitespace inside). 133 word: str = seg 134 wlen: int = len(word) 135 136 # if the word itself is longer than n, we must split it with hyphens. 137 if wlen > max_chars: 138 # first, if we're in the middle of a block, close it & start fresh. 139 if current_length > 0: 140 start_new_block() 141 142 remaining = word 143 # carve off (n-1)-sized chunks + “-” so each chunk is n chars. 144 while len(remaining) > (max_chars - 1): 145 chunk = remaining[: max_chars - 1] + '-' 146 append_text_to_block(chunk) 147 # that chunk fills the current block 148 start_new_block() 149 remaining = remaining[max_chars - 1 :] 150 151 # now whatever remains is ≤ n characters 152 if remaining: 153 append_text_to_block(remaining) 154 current_length = len(remaining) 155 156 else: 157 # word fits fully within a block (≤ n). 158 if current_length + wlen <= max_chars: 159 append_text_to_block(word) 160 current_length += wlen 161 else: 162 # not enough space in current block → start a new one 163 start_new_block() 164 append_text_to_block(word) 165 current_length = wlen 166 167 elif isinstance(token, cross.LinkToken): 168 link_len = min(len(token.label), 35) 169 170 if current_length + link_len <= max_chars: 171 current_block.append(token) 172 current_length += link_len 173 else: 174 start_new_block() 175 current_block.append(token) 176 current_length = link_len 177 178 elif isinstance(token, cross.TagToken): 179 # we treat a hashtag like “#tagname” for counting. 180 hashtag_len = 1 + len(token.tag) 181 if current_length + hashtag_len <= max_chars: 182 current_block.append(token) 183 current_length += hashtag_len 184 else: 185 start_new_block() 186 current_block.append(token) 187 current_length = hashtag_len 188 189 else: 190 # if you happen to have other types, just append them without affecting length. 191 current_block.append(token) 192 193 # append any remaining tokens as the final block 194 if current_block: 195 blocks.append(current_block) 196 197 return blocks 198 199def safe_get(obj: dict, key: str, default): 200 val = obj.get(key, default) 201 return val if val else default 202 203def value_or_envvar(text: str) -> str: 204 if text.startswith('env:'): 205 return os.environ.get(text[4:], '') 206 return text 207 208def get_or_envvar(obj: dict, key: str): 209 return value_or_envvar(obj.get(key, ''))