import re import cross import logging, sys, os logging.basicConfig(stream=sys.stdout, level=logging.INFO) LOGGER = logging.getLogger("XPost") ALTERNATE = re.compile(r'\S+|\s+') def canonical_label(label: str | None, href: str): if not label or label == href: return True split = href.split('://', 1) if len(split) > 1: if split[1] == label: return True return False def split_tokens(tokens: list[cross.Token], max_chars: int, max_link_len: int = 35) -> list[list[cross.Token]]: def start_new_block(): nonlocal current_block, blocks, current_length if current_block: blocks.append(current_block) current_block = [] current_length = 0 def append_text_to_block(text_segment): nonlocal current_block # if the last element in the current block is also text, just append to it if current_block and isinstance(current_block[-1], cross.TextToken): current_block[-1].text += text_segment else: current_block.append(cross.TextToken(text_segment)) blocks: list[list[cross.Token]] = [] current_block: list[cross.Token] = [] current_length: int = 0 for token in tokens: if isinstance(token, cross.TextToken): # split content into alternating “words” (\S+) and “whitespace” (\s+). # this ensures every space/newline is treated as its own segment. segments: list[str] = ALTERNATE.findall(token.text) for seg in segments: if seg.isspace(): # whitespace segment: we count it, and if it doesn't fully fit, # split the whitespace across blocks to preserve exact spacing. seg_len: int = len(seg) while seg_len > 0: space_left = max_chars - current_length if space_left == 0: start_new_block() continue take = min(space_left, seg_len) part = seg[:take] append_text_to_block(part) current_length += len(part) seg = seg[take:] seg_len -= take if current_length == max_chars: start_new_block() else: # seg is a “word” (no whitespace inside). word: str = seg wlen: int = len(word) # if the word itself is longer than n, we must split it with hyphens. if wlen > max_chars: # first, if we're in the middle of a block, close it & start fresh. if current_length > 0: start_new_block() remaining = word # carve off (n-1)-sized chunks + “-” so each chunk is n chars. while len(remaining) > (max_chars - 1): chunk = remaining[: max_chars - 1] + '-' append_text_to_block(chunk) # that chunk fills the current block start_new_block() remaining = remaining[max_chars - 1 :] # now whatever remains is ≤ n characters if remaining: append_text_to_block(remaining) current_length = len(remaining) else: # word fits fully within a block (≤ n). if current_length + wlen <= max_chars: append_text_to_block(word) current_length += wlen else: # not enough space in current block → start a new one start_new_block() append_text_to_block(word) current_length = wlen elif isinstance(token, cross.LinkToken): link_len = len(token.label) if canonical_label(token.label, token.href): link_len = min(link_len, max_link_len) if current_length + link_len <= max_chars: current_block.append(token) current_length += link_len else: start_new_block() current_block.append(token) current_length = link_len elif isinstance(token, cross.TagToken): # we treat a hashtag like “#tagname” for counting. hashtag_len = 1 + len(token.tag) if current_length + hashtag_len <= max_chars: current_block.append(token) current_length += hashtag_len else: start_new_block() current_block.append(token) current_length = hashtag_len else: # if you happen to have other types, just append them without affecting length. current_block.append(token) # append any remaining tokens as the final block if current_block: blocks.append(current_block) return blocks def safe_get(obj: dict, key: str, default): val = obj.get(key, default) return val if val else default def as_envvar(text: str | None) -> str | None: if not text: return None if text.startswith('env:'): return os.environ.get(text[4:], '') return text