util.py at e497a89b2b08e18f62e8ff6a8fba8d1fa6faac8e · zenfyr.dev/xpost

zenfyr.dev / xpost
social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
xpost / util.py
at e497a89b2b08e18f62e8ff6a8fba8d1fa6faac8e 5.7 kB view raw
  1import re
  2from markdownify import markdownify as md
  3import cross
  4import logging, sys, os
  5
  6logging.basicConfig(stream=sys.stdout, level=logging.INFO)
  7LOGGER = logging.getLogger("XPost")
  8
  9ALTERNATE = re.compile(r'\S+|\s+')
 10
 11def canonical_label(label: str | None, href: str):
 12    if not label or label == href:
 13        return True
 14    
 15    split = href.split('://', 1)
 16    if len(split) > 1:
 17        if split[1] == label:
 18            return True
 19    
 20    return False
 21
 22def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:
 23    def start_new_block():
 24        nonlocal current_block, blocks, current_length
 25        if current_block:
 26            blocks.append(current_block)
 27        current_block = []
 28        current_length = 0
 29
 30    def append_text_to_block(text_segment):
 31        nonlocal current_block
 32        # if the last element in the current block is also text, just append to it
 33        if current_block and isinstance(current_block[-1], cross.TextToken):
 34            current_block[-1].text += text_segment
 35        else:
 36            current_block.append(cross.TextToken(text_segment))
 37    
 38    blocks: list[list[cross.Token]] = []
 39    current_block: list[cross.Token] = []
 40    current_length: int = 0
 41
 42    for token in tokens:
 43        if isinstance(token, cross.TextToken):
 44            # split content into alternating “words” (\S+) and “whitespace” (\s+).
 45            # this ensures every space/newline is treated as its own segment.
 46            segments: list[str] = ALTERNATE.findall(token.text)
 47
 48            for seg in segments:
 49                if seg.isspace():
 50                    # whitespace segment: we count it, and if it doesn't fully fit,
 51                    # split the whitespace across blocks to preserve exact spacing.
 52                    seg_len: int = len(seg)
 53                    while seg_len > 0:
 54                        space_left = max_chars - current_length
 55                        if space_left == 0:
 56                            start_new_block()
 57                            continue
 58
 59                        take = min(space_left, seg_len)
 60                        part = seg[:take]
 61                        append_text_to_block(part)
 62
 63                        current_length += len(part)
 64                        seg = seg[take:]
 65                        seg_len -= take
 66
 67                        if current_length == max_chars:
 68                            start_new_block()
 69
 70                else:
 71                    # seg is a “word” (no whitespace inside).
 72                    word: str = seg
 73                    wlen: int = len(word)
 74
 75                    # if the word itself is longer than n, we must split it with hyphens.
 76                    if wlen > max_chars:
 77                        # first, if we're in the middle of a block, close it & start fresh.
 78                        if current_length > 0:
 79                            start_new_block()
 80
 81                        remaining = word
 82                        # carve off (n-1)-sized chunks + “-” so each chunk is n chars.
 83                        while len(remaining) > (max_chars - 1):
 84                            chunk = remaining[: max_chars - 1] + '-'
 85                            append_text_to_block(chunk)
 86                            # that chunk fills the current block
 87                            start_new_block()
 88                            remaining = remaining[max_chars - 1 :]
 89
 90                        # now whatever remains is ≤ n characters
 91                        if remaining:
 92                            append_text_to_block(remaining)
 93                            current_length = len(remaining)
 94
 95                    else:
 96                        # word fits fully within a block (≤ n).
 97                        if current_length + wlen <= max_chars:
 98                            append_text_to_block(word)
 99                            current_length += wlen
100                        else:
101                            # not enough space in current block → start a new one
102                            start_new_block()
103                            append_text_to_block(word)
104                            current_length = wlen
105
106        elif isinstance(token, cross.LinkToken):
107            link_len = len(token.label)
108            if canonical_label(token.label, token.href):
109                link_len = min(link_len, 35)
110
111            if current_length + link_len <= max_chars:
112                current_block.append(token)
113                current_length += link_len
114            else:
115                start_new_block()
116                current_block.append(token)
117                current_length = link_len
118
119        elif isinstance(token, cross.TagToken):
120            # we treat a hashtag like “#tagname” for counting.
121            hashtag_len = 1 + len(token.tag)
122            if current_length + hashtag_len <= max_chars:
123                current_block.append(token)
124                current_length += hashtag_len
125            else:
126                start_new_block()
127                current_block.append(token)
128                current_length = hashtag_len
129
130        else:
131            # if you happen to have other types, just append them without affecting length.
132            current_block.append(token)
133
134    # append any remaining tokens as the final block
135    if current_block:
136        blocks.append(current_block)
137
138    return blocks
139
140def safe_get(obj: dict, key: str, default):
141    val = obj.get(key, default)
142    return val if val else default
143
144def value_or_envvar(text: str) -> str:
145    if text.startswith('env:'):
146        return os.environ.get(text[4:], '')
147    return text
148
149def get_or_envvar(obj: dict, key: str):
150    return value_or_envvar(obj.get(key, ''))