social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1import re 2from markdownify import markdownify as md 3import cross 4import logging, sys, os 5 6logging.basicConfig(stream=sys.stdout, level=logging.INFO) 7LOGGER = logging.getLogger("XPost") 8 9ALTERNATE = re.compile(r'\S+|\s+') 10 11def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]: 12 def start_new_block(): 13 nonlocal current_block, blocks, current_length 14 if current_block: 15 blocks.append(current_block) 16 current_block = [] 17 current_length = 0 18 19 def append_text_to_block(text_segment): 20 nonlocal current_block 21 # if the last element in the current block is also text, just append to it 22 if current_block and isinstance(current_block[-1], cross.TextToken): 23 current_block[-1].text += text_segment 24 else: 25 current_block.append(cross.TextToken(text_segment)) 26 27 blocks: list[list[cross.Token]] = [] 28 current_block: list[cross.Token] = [] 29 current_length: int = 0 30 31 for token in tokens: 32 if isinstance(token, cross.TextToken): 33 # split content into alternating “words” (\S+) and “whitespace” (\s+). 34 # this ensures every space/newline is treated as its own segment. 35 segments: list[str] = ALTERNATE.findall(token.text) 36 37 for seg in segments: 38 if seg.isspace(): 39 # whitespace segment: we count it, and if it doesn't fully fit, 40 # split the whitespace across blocks to preserve exact spacing. 41 seg_len: int = len(seg) 42 while seg_len > 0: 43 space_left = max_chars - current_length 44 if space_left == 0: 45 start_new_block() 46 continue 47 48 take = min(space_left, seg_len) 49 part = seg[:take] 50 append_text_to_block(part) 51 52 current_length += len(part) 53 seg = seg[take:] 54 seg_len -= take 55 56 if current_length == max_chars: 57 start_new_block() 58 59 else: 60 # seg is a “word” (no whitespace inside). 61 word: str = seg 62 wlen: int = len(word) 63 64 # if the word itself is longer than n, we must split it with hyphens. 65 if wlen > max_chars: 66 # first, if we're in the middle of a block, close it & start fresh. 67 if current_length > 0: 68 start_new_block() 69 70 remaining = word 71 # carve off (n-1)-sized chunks + “-” so each chunk is n chars. 72 while len(remaining) > (max_chars - 1): 73 chunk = remaining[: max_chars - 1] + '-' 74 append_text_to_block(chunk) 75 # that chunk fills the current block 76 start_new_block() 77 remaining = remaining[max_chars - 1 :] 78 79 # now whatever remains is ≤ n characters 80 if remaining: 81 append_text_to_block(remaining) 82 current_length = len(remaining) 83 84 else: 85 # word fits fully within a block (≤ n). 86 if current_length + wlen <= max_chars: 87 append_text_to_block(word) 88 current_length += wlen 89 else: 90 # not enough space in current block → start a new one 91 start_new_block() 92 append_text_to_block(word) 93 current_length = wlen 94 95 elif isinstance(token, cross.LinkToken): 96 link_len = min(len(token.label), 35) 97 98 if current_length + link_len <= max_chars: 99 current_block.append(token) 100 current_length += link_len 101 else: 102 start_new_block() 103 current_block.append(token) 104 current_length = link_len 105 106 elif isinstance(token, cross.TagToken): 107 # we treat a hashtag like “#tagname” for counting. 108 hashtag_len = 1 + len(token.tag) 109 if current_length + hashtag_len <= max_chars: 110 current_block.append(token) 111 current_length += hashtag_len 112 else: 113 start_new_block() 114 current_block.append(token) 115 current_length = hashtag_len 116 117 else: 118 # if you happen to have other types, just append them without affecting length. 119 current_block.append(token) 120 121 # append any remaining tokens as the final block 122 if current_block: 123 blocks.append(current_block) 124 125 return blocks 126 127def safe_get(obj: dict, key: str, default): 128 val = obj.get(key, default) 129 return val if val else default 130 131def value_or_envvar(text: str) -> str: 132 if text.startswith('env:'): 133 return os.environ.get(text[4:], '') 134 return text 135 136def get_or_envvar(obj: dict, key: str): 137 return value_or_envvar(obj.get(key, ''))