social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1import re 2from markdownify import markdownify as md 3import cross 4import logging, sys, os 5 6logging.basicConfig(stream=sys.stdout, level=logging.INFO) 7LOGGER = logging.getLogger("XPost") 8 9ALTERNATE = re.compile(r'\S+|\s+') 10 11def canonical_label(label: str | None, href: str): 12 if not label or label == href: 13 return True 14 15 split = href.split('://', 1) 16 if len(split) > 1: 17 if split[1] == label: 18 return True 19 20 return False 21 22def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]: 23 def start_new_block(): 24 nonlocal current_block, blocks, current_length 25 if current_block: 26 blocks.append(current_block) 27 current_block = [] 28 current_length = 0 29 30 def append_text_to_block(text_segment): 31 nonlocal current_block 32 # if the last element in the current block is also text, just append to it 33 if current_block and isinstance(current_block[-1], cross.TextToken): 34 current_block[-1].text += text_segment 35 else: 36 current_block.append(cross.TextToken(text_segment)) 37 38 blocks: list[list[cross.Token]] = [] 39 current_block: list[cross.Token] = [] 40 current_length: int = 0 41 42 for token in tokens: 43 if isinstance(token, cross.TextToken): 44 # split content into alternating “words” (\S+) and “whitespace” (\s+). 45 # this ensures every space/newline is treated as its own segment. 46 segments: list[str] = ALTERNATE.findall(token.text) 47 48 for seg in segments: 49 if seg.isspace(): 50 # whitespace segment: we count it, and if it doesn't fully fit, 51 # split the whitespace across blocks to preserve exact spacing. 52 seg_len: int = len(seg) 53 while seg_len > 0: 54 space_left = max_chars - current_length 55 if space_left == 0: 56 start_new_block() 57 continue 58 59 take = min(space_left, seg_len) 60 part = seg[:take] 61 append_text_to_block(part) 62 63 current_length += len(part) 64 seg = seg[take:] 65 seg_len -= take 66 67 if current_length == max_chars: 68 start_new_block() 69 70 else: 71 # seg is a “word” (no whitespace inside). 72 word: str = seg 73 wlen: int = len(word) 74 75 # if the word itself is longer than n, we must split it with hyphens. 76 if wlen > max_chars: 77 # first, if we're in the middle of a block, close it & start fresh. 78 if current_length > 0: 79 start_new_block() 80 81 remaining = word 82 # carve off (n-1)-sized chunks + “-” so each chunk is n chars. 83 while len(remaining) > (max_chars - 1): 84 chunk = remaining[: max_chars - 1] + '-' 85 append_text_to_block(chunk) 86 # that chunk fills the current block 87 start_new_block() 88 remaining = remaining[max_chars - 1 :] 89 90 # now whatever remains is ≤ n characters 91 if remaining: 92 append_text_to_block(remaining) 93 current_length = len(remaining) 94 95 else: 96 # word fits fully within a block (≤ n). 97 if current_length + wlen <= max_chars: 98 append_text_to_block(word) 99 current_length += wlen 100 else: 101 # not enough space in current block → start a new one 102 start_new_block() 103 append_text_to_block(word) 104 current_length = wlen 105 106 elif isinstance(token, cross.LinkToken): 107 link_len = len(token.label) 108 if canonical_label(token.label, token.href): 109 link_len = min(link_len, 35) 110 111 if current_length + link_len <= max_chars: 112 current_block.append(token) 113 current_length += link_len 114 else: 115 start_new_block() 116 current_block.append(token) 117 current_length = link_len 118 119 elif isinstance(token, cross.TagToken): 120 # we treat a hashtag like “#tagname” for counting. 121 hashtag_len = 1 + len(token.tag) 122 if current_length + hashtag_len <= max_chars: 123 current_block.append(token) 124 current_length += hashtag_len 125 else: 126 start_new_block() 127 current_block.append(token) 128 current_length = hashtag_len 129 130 else: 131 # if you happen to have other types, just append them without affecting length. 132 current_block.append(token) 133 134 # append any remaining tokens as the final block 135 if current_block: 136 blocks.append(current_block) 137 138 return blocks 139 140def safe_get(obj: dict, key: str, default): 141 val = obj.get(key, default) 142 return val if val else default 143 144def value_or_envvar(text: str) -> str: 145 if text.startswith('env:'): 146 return os.environ.get(text[4:], '') 147 return text 148 149def get_or_envvar(obj: dict, key: str): 150 return value_or_envvar(obj.get(key, ''))