social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1import re 2import cross 3import logging, sys, os 4 5logging.basicConfig(stream=sys.stdout, level=logging.INFO) 6LOGGER = logging.getLogger("XPost") 7 8ALTERNATE = re.compile(r'\S+|\s+') 9 10def canonical_label(label: str | None, href: str): 11 if not label or label == href: 12 return True 13 14 split = href.split('://', 1) 15 if len(split) > 1: 16 if split[1] == label: 17 return True 18 19 return False 20 21def split_tokens(tokens: list[cross.Token], max_chars: int, max_link_len: int = 35) -> list[list[cross.Token]]: 22 def start_new_block(): 23 nonlocal current_block, blocks, current_length 24 if current_block: 25 blocks.append(current_block) 26 current_block = [] 27 current_length = 0 28 29 def append_text_to_block(text_segment): 30 nonlocal current_block 31 # if the last element in the current block is also text, just append to it 32 if current_block and isinstance(current_block[-1], cross.TextToken): 33 current_block[-1].text += text_segment 34 else: 35 current_block.append(cross.TextToken(text_segment)) 36 37 blocks: list[list[cross.Token]] = [] 38 current_block: list[cross.Token] = [] 39 current_length: int = 0 40 41 for token in tokens: 42 if isinstance(token, cross.TextToken): 43 # split content into alternating “words” (\S+) and “whitespace” (\s+). 44 # this ensures every space/newline is treated as its own segment. 45 segments: list[str] = ALTERNATE.findall(token.text) 46 47 for seg in segments: 48 if seg.isspace(): 49 # whitespace segment: we count it, and if it doesn't fully fit, 50 # split the whitespace across blocks to preserve exact spacing. 51 seg_len: int = len(seg) 52 while seg_len > 0: 53 space_left = max_chars - current_length 54 if space_left == 0: 55 start_new_block() 56 continue 57 58 take = min(space_left, seg_len) 59 part = seg[:take] 60 append_text_to_block(part) 61 62 current_length += len(part) 63 seg = seg[take:] 64 seg_len -= take 65 66 if current_length == max_chars: 67 start_new_block() 68 69 else: 70 # seg is a “word” (no whitespace inside). 71 word: str = seg 72 wlen: int = len(word) 73 74 # if the word itself is longer than n, we must split it with hyphens. 75 if wlen > max_chars: 76 # first, if we're in the middle of a block, close it & start fresh. 77 if current_length > 0: 78 start_new_block() 79 80 remaining = word 81 # carve off (n-1)-sized chunks + “-” so each chunk is n chars. 82 while len(remaining) > (max_chars - 1): 83 chunk = remaining[: max_chars - 1] + '-' 84 append_text_to_block(chunk) 85 # that chunk fills the current block 86 start_new_block() 87 remaining = remaining[max_chars - 1 :] 88 89 # now whatever remains is ≤ n characters 90 if remaining: 91 append_text_to_block(remaining) 92 current_length = len(remaining) 93 94 else: 95 # word fits fully within a block (≤ n). 96 if current_length + wlen <= max_chars: 97 append_text_to_block(word) 98 current_length += wlen 99 else: 100 # not enough space in current block → start a new one 101 start_new_block() 102 append_text_to_block(word) 103 current_length = wlen 104 105 elif isinstance(token, cross.LinkToken): 106 link_len = len(token.label) 107 if canonical_label(token.label, token.href): 108 link_len = min(link_len, max_link_len) 109 110 if current_length + link_len <= max_chars: 111 current_block.append(token) 112 current_length += link_len 113 else: 114 start_new_block() 115 current_block.append(token) 116 current_length = link_len 117 118 elif isinstance(token, cross.TagToken): 119 # we treat a hashtag like “#tagname” for counting. 120 hashtag_len = 1 + len(token.tag) 121 if current_length + hashtag_len <= max_chars: 122 current_block.append(token) 123 current_length += hashtag_len 124 else: 125 start_new_block() 126 current_block.append(token) 127 current_length = hashtag_len 128 129 else: 130 # if you happen to have other types, just append them without affecting length. 131 current_block.append(token) 132 133 # append any remaining tokens as the final block 134 if current_block: 135 blocks.append(current_block) 136 137 return blocks 138 139def safe_get(obj: dict, key: str, default): 140 val = obj.get(key, default) 141 return val if val else default 142 143def as_envvar(text: str | None) -> str | None: 144 if not text: 145 return None 146 147 if text.startswith('env:'): 148 return os.environ.get(text[4:], '') 149 150 return text