social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

fix: facet parsing wasn't using utf8

zenfyr.dev d1960e33 383c6d2b

verified
Changed files
+14 -10
bluesky
+14 -10
bluesky/common.py
···
text: str = post.get('text', '')
if not text:
return []
-
text = text.encode(encoding='utf-8').decode(encoding='utf-8')
facets: list[dict] = post.get('facets', [])
if not facets:
-
return [cross.TextToken(text)]
slices: list[tuple[int, int, str, str]] = []
···
slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))
if not slices:
-
return [cross.TextToken(text)]
slices.sort(key=lambda s: s[0])
unique: list[tuple[int, int, str, str]] = []
···
current_end = end
if not unique:
-
return [cross.TextToken(text)]
tokens: list[cross.Token] = []
prev = 0
···
for start, end, ttype, val in unique:
if start > prev:
# text between facets
-
tokens.append(cross.TextToken(text[prev:start]))
# facet token
match ttype:
case 'link':
-
label = text[start:end]
# try to unflatten links
split = val.split('://')
···
else:
tokens.append(cross.LinkToken(val, label))
case 'tag':
-
tokens.append(cross.TagToken(text[start:end]))
case 'mention':
-
tokens.append(cross.MentionToken(text[start:end], val))
prev = end
-
if prev < len(text):
-
tokens.append(cross.TextToken(text[prev:]))
return tokens
···
text: str = post.get('text', '')
if not text:
return []
+
ut8_text = text.encode(encoding='utf-8')
+
+
def decode(ut8: bytes) -> str:
+
return ut8.decode(encoding='utf-8')
facets: list[dict] = post.get('facets', [])
if not facets:
+
return [cross.TextToken(decode(ut8_text))]
slices: list[tuple[int, int, str, str]] = []
···
slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))
if not slices:
+
return [cross.TextToken(decode(ut8_text))]
slices.sort(key=lambda s: s[0])
unique: list[tuple[int, int, str, str]] = []
···
current_end = end
if not unique:
+
return [cross.TextToken(decode(ut8_text))]
tokens: list[cross.Token] = []
prev = 0
···
for start, end, ttype, val in unique:
if start > prev:
# text between facets
+
tokens.append(cross.TextToken(decode(ut8_text[prev:start])))
# facet token
match ttype:
case 'link':
+
label = decode(ut8_text[start:end])
+
print(label)
# try to unflatten links
split = val.split('://')
···
else:
tokens.append(cross.LinkToken(val, label))
case 'tag':
+
tokens.append(cross.TagToken(decode(ut8_text[start:end])))
case 'mention':
+
tokens.append(cross.MentionToken(decode(ut8_text[start:end]), val))
prev = end
+
if prev < len(ut8_text):
+
tokens.append(cross.TextToken(decode(ut8_text[prev:])))
return tokens