social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
at next 3.2 kB view raw
1from cross.tokens import LinkToken, MentionToken, TagToken, TextToken, Token 2 3 4def tokenize_post(text: str, facets: list[dict]) -> list[Token]: 5 def decode(ut8: bytes) -> str: 6 return ut8.decode(encoding="utf-8") 7 8 if not text: 9 return [] 10 ut8_text = text.encode(encoding="utf-8") 11 if not facets: 12 return [TextToken(text=decode(ut8_text))] 13 14 slices: list[tuple[int, int, str, str]] = [] 15 16 for facet in facets: 17 features: list[dict] = facet.get("features", []) 18 if not features: 19 continue 20 21 # we don't support overlapping facets/features 22 feature = features[0] 23 feature_type = feature["$type"] 24 index = facet["index"] 25 match feature_type: 26 case "app.bsky.richtext.facet#tag": 27 slices.append( 28 (index["byteStart"], index["byteEnd"], "tag", feature["tag"]) 29 ) 30 case "app.bsky.richtext.facet#link": 31 slices.append( 32 (index["byteStart"], index["byteEnd"], "link", feature["uri"]) 33 ) 34 case "app.bsky.richtext.facet#mention": 35 slices.append( 36 (index["byteStart"], index["byteEnd"], "mention", feature["did"]) 37 ) 38 39 if not slices: 40 return [TextToken(text=decode(ut8_text))] 41 42 slices.sort(key=lambda s: s[0]) 43 unique: list[tuple[int, int, str, str]] = [] 44 current_end = 0 45 for start, end, ttype, val in slices: 46 if start >= current_end: 47 unique.append((start, end, ttype, val)) 48 current_end = end 49 50 if not unique: 51 return [TextToken(text=decode(ut8_text))] 52 53 tokens: list[Token] = [] 54 prev = 0 55 56 for start, end, ttype, val in unique: 57 if start > prev: 58 # text between facets 59 tokens.append(TextToken(text=decode(ut8_text[prev:start]))) 60 # facet token 61 match ttype: 62 case "link": 63 label = decode(ut8_text[start:end]) 64 65 # try to unflatten links 66 split = val.split("://", 1) 67 if len(split) > 1: 68 if split[1].startswith(label): 69 tokens.append(LinkToken(href=val)) 70 prev = end 71 continue 72 73 if label.endswith("...") and split[1].startswith(label[:-3]): 74 tokens.append(LinkToken(href=val)) 75 prev = end 76 continue 77 78 tokens.append(LinkToken(href=val, label=label)) 79 case "tag": 80 tag = decode(ut8_text[start:end]) 81 tokens.append(TagToken(tag=tag[1:] if tag.startswith("#") else tag)) 82 case "mention": 83 mention = decode(ut8_text[start:end]) 84 tokens.append( 85 MentionToken( 86 username=mention[1:] if mention.startswith("@") else mention, 87 uri=val, 88 ) 89 ) 90 prev = end 91 92 if prev < len(ut8_text): 93 tokens.append(TextToken(text=decode(ut8_text[prev:]))) 94 95 return tokens