social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
1import re 2from dataclasses import replace 3 4import grapheme 5 6from cross.tokens import LinkToken, TagToken, TextToken, Token 7 8 9def canonical_label(label: str | None, href: str): 10 if not label or label == href: 11 return True 12 13 split = href.split("://", 1) 14 if len(split) > 1: 15 if split[1] == label: 16 return True 17 18 return False 19 20 21ALTERNATE = re.compile(r"\S+|\s+") 22 23 24def split_tokens( 25 tokens: list[Token], 26 max_chars: int, 27 max_link_len: int = 35, 28) -> list[list[Token]]: 29 def new_block() -> None: 30 nonlocal blocks, block, length 31 if block: 32 blocks.append(block) 33 block, length = [], 0 34 35 def append_text(text: str) -> None: 36 nonlocal block 37 if block and isinstance(block[-1], TextToken): 38 block[-1] = replace(block[-1], text=block[-1].text + text) 39 else: 40 block.append(TextToken(text=text)) 41 42 blocks: list[list[Token]] = [] 43 block: list[Token] = [] 44 length: int = 0 45 46 for tk in tokens: 47 if isinstance(tk, TagToken): 48 tag_len = 1 + grapheme.length(tk.tag) 49 if length + tag_len > max_chars: 50 new_block() 51 block.append(tk) 52 length += tag_len 53 continue 54 if isinstance(tk, LinkToken): 55 label_text = tk.label or "" 56 link_len = grapheme.length(label_text) 57 58 if canonical_label(tk.label, tk.href): 59 link_len = min(link_len, max_link_len) 60 61 if length + link_len <= max_chars: 62 block.append(tk) 63 length += link_len 64 continue 65 66 if length: 67 new_block() 68 69 remaining = label_text 70 while remaining: 71 room = ( 72 max_chars 73 - length 74 - (0 if grapheme.length(remaining) <= max_chars else 1) 75 ) 76 chunk = grapheme.slice(remaining, 0, room) 77 if grapheme.length(remaining) > room: 78 chunk += "-" 79 80 block.append(replace(tk, label=chunk)) 81 length += grapheme.length(chunk) 82 83 remaining = grapheme.slice(remaining, room, grapheme.length(remaining)) 84 if remaining: 85 new_block() 86 continue 87 if isinstance(tk, TextToken): 88 for seg in ALTERNATE.findall(tk.text): 89 seg_len = grapheme.length(seg) 90 91 if length + seg_len <= max_chars - (0 if seg.isspace() else 1): 92 append_text(seg) 93 length += seg_len 94 continue 95 96 if length: 97 new_block() 98 99 if not seg.isspace(): 100 while grapheme.length(seg) > max_chars - 1: 101 chunk = grapheme.slice(seg, 0, max_chars - 1) + "-" 102 append_text(chunk) 103 new_block() 104 seg = grapheme.slice(seg, max_chars - 1, grapheme.length(seg)) 105 else: 106 while grapheme.length(seg) > max_chars: 107 chunk = grapheme.slice(seg, 0, max_chars) 108 append_text(chunk) 109 new_block() 110 seg = grapheme.slice(seg, max_chars, grapheme.length(seg)) 111 112 if seg: 113 append_text(seg) 114 length = grapheme.length(seg) 115 continue 116 block.append(tk) 117 if block: 118 blocks.append(block) 119 120 return blocks