social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

i just vibecoded this (fix formatting)

zenfyr.dev 6c33c20a 996650db

verified
+3 -3
bluesky.py
···
token_type = token['type']
if token_type == 'text':
-
builder.text(token['value'])
elif token_type == 'hashtag':
-
builder.tag(token['value'], token['value'][1:])
elif token_type == 'link':
-
builder.link(token['label'], token['value'])
else:
# Fail on mention!
return None
···
token_type = token['type']
if token_type == 'text':
+
builder.text(token['content'])
elif token_type == 'hashtag':
+
builder.tag('#' + token['tag'], token['tag'])
elif token_type == 'link':
+
builder.link(token['text'], token['url'])
else:
# Fail on mention!
return None
+1 -1
main.py
···
label_text: set[str] = set()
status_spoiler = status['spoiler_text']
if status_spoiler:
-
tokens.insert(0, {"type": "text", "value": "CW: " + status_spoiler + '\n\n'})
label_text.add('graphic-media')
if any(tag in status_spoiler for tag in ADULT_LABEL):
···
label_text: set[str] = set()
status_spoiler = status['spoiler_text']
if status_spoiler:
+
tokens.insert(0, {"type": "text", "content": "CW: " + status_spoiler + '\n\n'})
label_text.add('graphic-media')
if any(tag in status_spoiler for tag in ADULT_LABEL):
+1
pyproject.toml
···
dependencies = [
"atproto>=0.0.61",
"click>=8.2.1",
"requests>=2.32.3",
]
···
dependencies = [
"atproto>=0.0.61",
"click>=8.2.1",
+
"markdownify>=1.1.0",
"requests>=2.32.3",
]
+168 -60
util.py
···
-
import re, html
-
NEWLINE = re.compile(r'</p>|<br\s*/?>', re.IGNORECASE)
-
NON_ANCHORS = re.compile(r'(?i)</?(?!a\b)[a-z][^>]*>')
-
ANCHORS = re.compile(r'<a\s+[^>]*href=["\'](.*?)["\'][^>]*>(.*?)</a>', re.IGNORECASE)
DEFAULT_SETTINGS: dict = {
'bluesky': {
···
}
}
-
def tokenize_html(content: str) -> list[dict]:
-
text = content.replace('<p>', '')
-
text = NEWLINE.sub('\n', text)
-
text = html.unescape(text)
-
text = NON_ANCHORS.sub('', text)
-
text = text.rstrip('\n')
tokens = []
-
pos = 0
-
for anchor in ANCHORS.finditer(text):
-
start, end = anchor.span()
-
-
if start > pos:
-
tokens.append({"type": "text", "value": text[pos:start]})
-
href = anchor.group(1).strip()
-
label = anchor.group(2).strip()
-
if label.startswith("#"):
-
tokens.append({"type": "hashtag", "value": label})
-
elif label.startswith("@"):
-
tokens.append({"type": "mention", "value": label})
else:
-
tokens.append({"type": "link", "value": href, "label": label})
-
pos = end
-
if pos < len(text):
-
tokens.append({"type": "text", "value": text[pos:]})
-
return tokens
-
def split_tokens(tokens: list[dict], max_chars: int) -> list[list[dict]]:
-
chunks = []
-
current_chunk = []
-
current_length = 0
-
for token in tokens:
-
token_type = token["type"]
-
value = token["value"]
-
val_len = len(value)
-
if token_type != "text":
-
if current_length + val_len > max_chars:
-
if current_chunk:
-
chunks.append(current_chunk)
-
current_chunk = [token]
-
current_length = val_len
else:
-
current_chunk.append(token)
-
current_length += val_len
-
else:
-
start = 0
-
while start < val_len:
-
space_left = max_chars - current_length
-
if space_left == 0:
-
chunks.append(current_chunk)
-
current_chunk = []
-
current_length = 0
-
space_left = max_chars
-
end = min(start + space_left, val_len)
-
piece = value[start:end]
-
current_chunk.append({"type": "text", "value": piece})
-
current_length += len(piece)
-
start = end
-
if current_chunk:
-
chunks.append(current_chunk)
-
return chunks
def safe_get(obj: dict, key: str, default):
val = obj.get(key, default)
···
+
import re
+
from markdownify import markdownify as md
+
ALTERNATE = re.compile(r'\S+|\s+')
DEFAULT_SETTINGS: dict = {
'bluesky': {
···
}
}
+
def tokenize_html(content: str):
+
return tokenize_markdown(md(content, autolinks=False))
+
def tokenize_markdown(md):
tokens = []
+
i = 0
+
length = len(md)
+
while i < length:
+
if md[i] == '!' and i + 1 < length and md[i + 1] == '[':
+
# media
+
start = i
+
i += 2
+
alt_text = ''
+
while i < length and md[i] != ']':
+
alt_text += md[i]
+
i += 1
+
i += 1 # skip ']
+
if i < length and md[i] == '(':
+
i += 1
+
url = ''
+
while i < length and md[i] != ')':
+
url += md[i]
+
i += 1
+
i += 1 # skip )
+
tokens.append({'type': 'media', 'alt': alt_text, 'url': url})
+
else:
+
tokens.append({'type': 'text', 'content': md[start:i]})
+
elif md[i] == '[':
+
# link or special
+
start = i
+
i += 1
+
link_text = ''
+
while i < length and md[i] != ']':
+
link_text += md[i]
+
i += 1
+
i += 1 # skip ]
+
if i < length and md[i] == '(':
+
i += 1
+
url = ''
+
while i < length and md[i] != ')':
+
url += md[i]
+
i += 1
+
i += 1 # skip )
+
if link_text.startswith('#'):
+
tokens.append({'type': 'hashtag', 'tag': link_text[1:], 'url': url})
+
elif link_text.startswith('@'):
+
tokens.append({'type': 'mention', 'mention': link_text[1:], 'url': url})
+
elif link_text.startswith('http://') or link_text.startswith('https://'):
+
tokens.append({'type': 'link', 'text': link_text, 'url': url})
+
else:
+
tokens.append({'type': 'link', 'text': link_text, 'url': url})
+
else:
+
tokens.append({'type': 'text', 'content': md[start:i]})
+
else:
+
# plain text
+
start = i
+
while i < length and md[i] != '[' and not (md[i] == '!' and i + 1 < length and md[i + 1] == '['):
+
i += 1
+
tokens.append({'type': 'text', 'content': md[start:i]})
+
return tokens
+
+
def split_tokens(tokens: list[dict], max_chars: int) -> list[list[dict]]:
+
def start_new_block():
+
nonlocal current_block, blocks, current_length
+
if current_block:
+
blocks.append(current_block)
+
current_block = []
+
current_length = 0
+
def append_text_to_block(text_segment):
+
nonlocal current_block
+
# if the last element in the current block is also text, just append to it
+
if current_block and current_block[-1]['type'] == 'text':
+
current_block[-1]['content'] += text_segment
else:
+
current_block.append({'type': 'text', 'content': text_segment})
+
+
blocks: list[list[dict]] = []
+
current_block: list[dict] = []
+
current_length: int = 0
+
for token in tokens:
+
ttype: str = token['type']
+
if ttype == 'text':
+
content: str = token['content']
+
# split content into alternating “words” (\S+) and “whitespace” (\s+).
+
# this ensures every space/newline is treated as its own segment.
+
segments: list[str] = ALTERNATE.findall(content)
+
for seg in segments:
+
if seg.isspace():
+
# whitespace segment: we count it, and if it doesn't fully fit,
+
# split the whitespace across blocks to preserve exact spacing.
+
seg_len: int = len(seg)
+
while seg_len > 0:
+
space_left = max_chars - current_length
+
if space_left == 0:
+
start_new_block()
+
continue
+
take = min(space_left, seg_len)
+
part = seg[:take]
+
append_text_to_block(part)
+
+
current_length += len(part)
+
seg = seg[take:]
+
seg_len -= take
+
+
if current_length == max_chars:
+
start_new_block()
+
+
else:
+
# seg is a “word” (no whitespace inside).
+
word: str = seg
+
wlen: int = len(word)
+
+
# if the word itself is longer than n, we must split it with hyphens.
+
if wlen > max_chars:
+
# first, if we're in the middle of a block, close it & start fresh.
+
if current_length > 0:
+
start_new_block()
+
remaining = word
+
# carve off (n-1)-sized chunks + “-” so each chunk is n chars.
+
while len(remaining) > (max_chars - 1):
+
chunk = remaining[: max_chars - 1] + '-'
+
append_text_to_block(chunk)
+
# that chunk fills the current block
+
start_new_block()
+
remaining = remaining[max_chars - 1 :]
+
# now whatever remains is ≤ n characters
+
if remaining:
+
append_text_to_block(remaining)
+
current_length = len(remaining)
+
else:
+
# word fits fully within a block (≤ n).
+
if current_length + wlen <= max_chars:
+
append_text_to_block(word)
+
current_length += wlen
+
else:
+
# not enough space in current block → start a new one
+
start_new_block()
+
append_text_to_block(word)
+
current_length = wlen
+
+
elif ttype == 'link':
+
url = token['url']
+
link_len = min(len(url), 35)
+
+
if current_length + link_len <= max_chars:
+
current_block.append(token)
+
current_length += link_len
else:
+
start_new_block()
+
current_block.append(token)
+
current_length = link_len
+
+
elif ttype == 'hashtag':
+
# we treat a hashtag like “#tagname” for counting.
+
hashtag_len = 1 + len(token['tag'])
+
if current_length + hashtag_len <= max_chars:
+
current_block.append(token)
+
current_length += hashtag_len
+
else:
+
start_new_block()
+
current_block.append(token)
+
current_length = hashtag_len
+
else:
+
# if you happen to have other types, just append them without affecting length.
+
current_block.append(token)
+
# append any remaining tokens as the final block
+
if current_block:
+
blocks.append(current_block)
+
return blocks
def safe_get(obj: dict, key: str, default):
val = obj.get(key, default)
+47 -1
uv.lock
···
]
[[package]]
name = "certifi"
version = "2025.4.26"
source = { registry = "https://pypi.org/simple" }
···
]
[[package]]
name = "pycparser"
version = "2.22"
source = { registry = "https://pypi.org/simple" }
···
]
[[package]]
name = "sniffio"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
···
[[package]]
name = "xpost"
-
version = "0.1.0"
source = { virtual = "." }
dependencies = [
{ name = "atproto" },
{ name = "click" },
{ name = "requests" },
]
···
requires-dist = [
{ name = "atproto", specifier = ">=0.0.61" },
{ name = "click", specifier = ">=8.2.1" },
{ name = "requests", specifier = ">=2.32.3" },
]
···
]
[[package]]
+
name = "beautifulsoup4"
+
version = "4.13.4"
+
source = { registry = "https://pypi.org/simple" }
+
dependencies = [
+
{ name = "soupsieve" },
+
{ name = "typing-extensions" },
+
]
+
sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067, upload-time = "2025-04-15T17:05:13.836Z" }
+
wheels = [
+
{ url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" },
+
]
+
+
[[package]]
name = "certifi"
version = "2025.4.26"
source = { registry = "https://pypi.org/simple" }
···
]
[[package]]
+
name = "markdownify"
+
version = "1.1.0"
+
source = { registry = "https://pypi.org/simple" }
+
dependencies = [
+
{ name = "beautifulsoup4" },
+
{ name = "six" },
+
]
+
sdist = { url = "https://files.pythonhosted.org/packages/2f/78/c48fed23c7aebc2c16049062e72de1da3220c274de59d28c942acdc9ffb2/markdownify-1.1.0.tar.gz", hash = "sha256:449c0bbbf1401c5112379619524f33b63490a8fa479456d41de9dc9e37560ebd", size = 17127, upload-time = "2025-03-05T11:54:40.574Z" }
+
wheels = [
+
{ url = "https://files.pythonhosted.org/packages/64/11/b751af7ad41b254a802cf52f7bc1fca7cabe2388132f2ce60a1a6b9b9622/markdownify-1.1.0-py3-none-any.whl", hash = "sha256:32a5a08e9af02c8a6528942224c91b933b4bd2c7d078f9012943776fc313eeef", size = 13901, upload-time = "2025-03-05T11:54:39.454Z" },
+
]
+
+
[[package]]
name = "pycparser"
version = "2.22"
source = { registry = "https://pypi.org/simple" }
···
]
[[package]]
+
name = "six"
+
version = "1.17.0"
+
source = { registry = "https://pypi.org/simple" }
+
sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
+
wheels = [
+
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
+
]
+
+
[[package]]
name = "sniffio"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+
]
+
+
[[package]]
+
name = "soupsieve"
+
version = "2.7"
+
source = { registry = "https://pypi.org/simple" }
+
sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418, upload-time = "2025-04-20T18:50:08.518Z" }
+
wheels = [
+
{ url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" },
]
[[package]]
···
[[package]]
name = "xpost"
+
version = "0.0.1"
source = { virtual = "." }
dependencies = [
{ name = "atproto" },
{ name = "click" },
+
{ name = "markdownify" },
{ name = "requests" },
]
···
requires-dist = [
{ name = "atproto", specifier = ">=0.0.61" },
{ name = "click", specifier = ">=8.2.1" },
+
{ name = "markdownify", specifier = ">=1.1.0" },
{ name = "requests", specifier = ">=2.32.3" },
]