util.py at 899ae340f589a4db0326c204bd5b7b0e9a25c1ff · zenfyr.dev/xpost

zenfyr.dev / xpost
social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
xpost / util.py
at 899ae340f589a4db0326c204bd5b7b0e9a25c1ff 5.4 kB view raw
  1import re
  2from markdownify import markdownify as md
  3import cross
  4import logging, sys, os
  5
  6logging.basicConfig(stream=sys.stdout, level=logging.INFO)
  7LOGGER = logging.getLogger("XPost")
  8
  9ALTERNATE = re.compile(r'\S+|\s+')
 10
 11def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
 12    def start_new_block():
 13        nonlocal current_block, blocks, current_length
 14        if current_block:
 15            blocks.append(current_block)
 16        current_block = []
 17        current_length = 0
 18
 19    def append_text_to_block(text_segment):
 20        nonlocal current_block
 21        # if the last element in the current block is also text, just append to it
 22        if current_block and isinstance(current_block[-1], cross.TextToken):
 23            current_block[-1].text += text_segment
 24        else:
 25            current_block.append(cross.TextToken(text_segment))
 26    
 27    blocks: list[list[cross.Token]] = []
 28    current_block: list[cross.Token] = []
 29    current_length: int = 0
 30
 31    for token in tokens:
 32        if isinstance(token, cross.TextToken):
 33            # split content into alternating “words” (\S+) and “whitespace” (\s+).
 34            # this ensures every space/newline is treated as its own segment.
 35            segments: list[str] = ALTERNATE.findall(token.text)
 36
 37            for seg in segments:
 38                if seg.isspace():
 39                    # whitespace segment: we count it, and if it doesn't fully fit,
 40                    # split the whitespace across blocks to preserve exact spacing.
 41                    seg_len: int = len(seg)
 42                    while seg_len > 0:
 43                        space_left = max_chars - current_length
 44                        if space_left == 0:
 45                            start_new_block()
 46                            continue
 47
 48                        take = min(space_left, seg_len)
 49                        part = seg[:take]
 50                        append_text_to_block(part)
 51
 52                        current_length += len(part)
 53                        seg = seg[take:]
 54                        seg_len -= take
 55
 56                        if current_length == max_chars:
 57                            start_new_block()
 58
 59                else:
 60                    # seg is a “word” (no whitespace inside).
 61                    word: str = seg
 62                    wlen: int = len(word)
 63
 64                    # if the word itself is longer than n, we must split it with hyphens.
 65                    if wlen > max_chars:
 66                        # first, if we're in the middle of a block, close it & start fresh.
 67                        if current_length > 0:
 68                            start_new_block()
 69
 70                        remaining = word
 71                        # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
 72                        while len(remaining) > (max_chars - 1):
 73                            chunk = remaining[: max_chars - 1] + '-'
 74                            append_text_to_block(chunk)
 75                            # that chunk fills the current block
 76                            start_new_block()
 77                            remaining = remaining[max_chars - 1 :]
 78
 79                        # now whatever remains is ≤ n characters
 80                        if remaining:
 81                            append_text_to_block(remaining)
 82                            current_length = len(remaining)
 83
 84                    else:
 85                        # word fits fully within a block (≤ n).
 86                        if current_length + wlen <= max_chars:
 87                            append_text_to_block(word)
 88                            current_length += wlen
 89                        else:
 90                            # not enough space in current block → start a new one
 91                            start_new_block()
 92                            append_text_to_block(word)
 93                            current_length = wlen
 94
 95        elif isinstance(token, cross.LinkToken):
 96            link_len = min(len(token.label), 35)
 97
 98            if current_length + link_len <= max_chars:
 99                current_block.append(token)
100                current_length += link_len
101            else:
102                start_new_block()
103                current_block.append(token)
104                current_length = link_len
105
106        elif isinstance(token, cross.TagToken):
107            # we treat a hashtag like “#tagname” for counting.
108            hashtag_len = 1 + len(token.tag)
109            if current_length + hashtag_len <= max_chars:
110                current_block.append(token)
111                current_length += hashtag_len
112            else:
113                start_new_block()
114                current_block.append(token)
115                current_length = hashtag_len
116
117        else:
118            # if you happen to have other types, just append them without affecting length.
119            current_block.append(token)
120
121    # append any remaining tokens as the final block
122    if current_block:
123        blocks.append(current_block)
124
125    return blocks
126
127def safe_get(obj: dict, key: str, default):
128    val = obj.get(key, default)
129    return val if val else default
130
131def value_or_envvar(text: str) -> str:
132    if text.startswith('env:'):
133        return os.environ.get(text[4:], '')
134    return text
135
136def get_or_envvar(obj: dict, key: str):
137    return value_or_envvar(obj.get(key, ''))