social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky
at master 18 kB view raw
1from atproto import Request, client_utils 2from atproto_client import models 3from httpx import Timeout 4 5import cross 6import misskey.mfm_util as mfm_util 7import util.database as database 8from bluesky.atproto2 import Client2, resolve_identity 9from bluesky.common import ADULT_PATTERN, PORN_PATTERN, SERVICE, tokens_to_richtext 10from util.database import DataBaseWorker 11from util.media import ( 12 MediaInfo, 13 compress_image, 14 convert_to_mp4, 15 get_filename_from_url, 16 get_media_meta, 17) 18from util.util import LOGGER, as_envvar 19 20ALLOWED_GATES = ["mentioned", "following", "followers", "everybody"] 21 22 23class BlueskyOutputOptions: 24 def __init__(self, o: dict) -> None: 25 self.quote_gate: bool = False 26 self.thread_gate: list[str] = ["everybody"] 27 self.encode_videos: bool = True 28 29 quote_gate = o.get("quote_gate") 30 if quote_gate is not None: 31 self.quote_gate = bool(quote_gate) 32 33 thread_gate = o.get("thread_gate") 34 if thread_gate is not None: 35 if any([v not in ALLOWED_GATES for v in thread_gate]): 36 raise ValueError( 37 f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}" 38 ) 39 self.thread_gate = thread_gate 40 41 encode_videos = o.get("encode_videos") 42 if encode_videos is not None: 43 self.encode_videos = bool(encode_videos) 44 45 46class BlueskyOutput(cross.Output): 47 def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None: 48 super().__init__(input, settings, db) 49 self.options = BlueskyOutputOptions(settings.get("options") or {}) 50 51 if not as_envvar(settings.get("app-password")): 52 raise Exception("Account app password not provided!") 53 54 did, pds = resolve_identity( 55 handle=as_envvar(settings.get("handle")), 56 did=as_envvar(settings.get("did")), 57 pds=as_envvar(settings.get("pds")), 58 ) 59 60 reqs = Request(timeout=Timeout(None, connect=30.0)) 61 62 self.bsky = Client2(pds, request=reqs) 63 self.bsky.configure_proxy_header( 64 service_type="bsky_appview", 65 did=as_envvar(settings.get("bsky_appview")) or "did:web:api.bsky.app", 66 ) 67 self.bsky.login(did, as_envvar(settings.get("app-password"))) 68 69 def __check_login(self): 70 login = self.bsky.me 71 if not login: 72 raise Exception("Client not logged in!") 73 return login 74 75 def _find_parent(self, parent_id: str): 76 login = self.__check_login() 77 78 thread_tuple = database.find_mapped_thread( 79 self.db, 80 parent_id, 81 self.input.user_id, 82 self.input.service, 83 login.did, 84 SERVICE, 85 ) 86 87 if not thread_tuple: 88 LOGGER.error("Failed to find thread tuple in the database!") 89 return None 90 91 root_uri: str = thread_tuple[0] 92 reply_uri: str = thread_tuple[1] 93 94 root_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"] 95 reply_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"] 96 97 root_record = models.AppBskyFeedPost.CreateRecordResponse( 98 uri=root_uri, cid=root_cid 99 ) 100 reply_record = models.AppBskyFeedPost.CreateRecordResponse( 101 uri=reply_uri, cid=reply_cid 102 ) 103 104 return ( 105 models.create_strong_ref(root_record), 106 models.create_strong_ref(reply_record), 107 thread_tuple[2], 108 thread_tuple[3], 109 ) 110 111 def _split_attachments(self, attachments: list[MediaInfo]): 112 sup_media: list[MediaInfo] = [] 113 unsup_media: list[MediaInfo] = [] 114 115 for a in attachments: 116 if a.mime.startswith("image/") or a.mime.startswith( 117 "video/" 118 ): # TODO convert gifs to videos 119 sup_media.append(a) 120 else: 121 unsup_media.append(a) 122 123 return (sup_media, unsup_media) 124 125 def _split_media_per_post( 126 self, tokens: list[client_utils.TextBuilder], media: list[MediaInfo] 127 ): 128 posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens] 129 available_indices: list[int] = list(range(len(posts))) 130 131 current_image_post_idx: int | None = None 132 133 def make_blank_post() -> dict: 134 return {"tokens": [client_utils.TextBuilder().text("")], "attachments": []} 135 136 def pop_next_empty_index() -> int: 137 if available_indices: 138 return available_indices.pop(0) 139 else: 140 new_idx = len(posts) 141 posts.append(make_blank_post()) 142 return new_idx 143 144 for att in media: 145 if att.mime.startswith("video/"): 146 current_image_post_idx = None 147 idx = pop_next_empty_index() 148 posts[idx]["attachments"].append(att) 149 elif att.mime.startswith("image/"): 150 if ( 151 current_image_post_idx is not None 152 and len(posts[current_image_post_idx]["attachments"]) < 4 153 ): 154 posts[current_image_post_idx]["attachments"].append(att) 155 else: 156 idx = pop_next_empty_index() 157 posts[idx]["attachments"].append(att) 158 current_image_post_idx = idx 159 160 result: list[tuple[client_utils.TextBuilder, list[MediaInfo]]] = [] 161 for p in posts: 162 result.append((p["tokens"], p["attachments"])) 163 return result 164 165 def accept_post(self, post: cross.Post): 166 login = self.__check_login() 167 168 parent_id = post.get_parent_id() 169 170 # used for db insertion 171 new_root_id = None 172 new_parent_id = None 173 174 root_ref = None 175 reply_ref = None 176 if parent_id: 177 parents = self._find_parent(parent_id) 178 if not parents: 179 return 180 root_ref, reply_ref, new_root_id, new_parent_id = parents 181 182 tokens = post.get_tokens().copy() 183 184 unique_labels: set[str] = set() 185 cw = post.get_spoiler() 186 if cw: 187 tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n")) 188 unique_labels.add("graphic-media") 189 190 # from bsky.app, a post can only have one of those labels 191 if PORN_PATTERN.search(cw): 192 unique_labels.add("porn") 193 elif ADULT_PATTERN.search(cw): 194 unique_labels.add("sexual") 195 196 if post.is_sensitive(): 197 unique_labels.add("graphic-media") 198 199 labels = ( 200 models.ComAtprotoLabelDefs.SelfLabels( 201 values=[ 202 models.ComAtprotoLabelDefs.SelfLabel(val=label) 203 for label in unique_labels 204 ] 205 ) 206 if unique_labels 207 else None 208 ) 209 210 sup_media, unsup_media = self._split_attachments(post.get_attachments()) 211 212 if unsup_media: 213 if tokens: 214 tokens.append(cross.TextToken("\n")) 215 for i, attachment in enumerate(unsup_media): 216 tokens.append( 217 cross.LinkToken( 218 attachment.url, f"[{get_filename_from_url(attachment.url)}]" 219 ) 220 ) 221 tokens.append(cross.TextToken(" ")) 222 223 if post.get_text_type() == "text/x.misskeymarkdown": 224 tokens, status = mfm_util.strip_mfm(tokens) 225 post_url = post.get_post_url() 226 if status and post_url: 227 tokens.append(cross.TextToken("\n")) 228 tokens.append( 229 cross.LinkToken(post_url, "[Post contains MFM, see original]") 230 ) 231 232 split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300) 233 post_text: list[client_utils.TextBuilder] = [] 234 235 # convert tokens into rich text. skip post if contains unsupported tokens 236 for block in split_tokens: 237 rich_text = tokens_to_richtext(block) 238 239 if not rich_text: 240 LOGGER.error( 241 "Skipping '%s' as it contains invalid rich text types!", 242 post.get_id(), 243 ) 244 return 245 post_text.append(rich_text) 246 247 if not post_text: 248 post_text = [client_utils.TextBuilder().text("")] 249 250 for m in sup_media: 251 if m.mime.startswith("image/"): 252 if len(m.io) > 2_000_000: 253 LOGGER.error( 254 "Skipping post_id '%s', failed to download attachment! File too large.", 255 post.get_id(), 256 ) 257 return 258 259 if m.mime.startswith("video/"): 260 if m.mime != "video/mp4" and not self.options.encode_videos: 261 LOGGER.info( 262 "Video is not mp4, but encoding is disabled. Skipping '%s'...", 263 post.get_id(), 264 ) 265 return 266 267 if len(m.io) > 100_000_000: 268 LOGGER.error( 269 "Skipping post_id '%s', failed to download attachment! File too large?", 270 post.get_id(), 271 ) 272 return 273 274 created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = [] 275 baked_media = self._split_media_per_post(post_text, sup_media) 276 277 for text, attachments in baked_media: 278 if not attachments: 279 if reply_ref and root_ref: 280 new_post = self.bsky.send_post( 281 text, 282 reply_to=models.AppBskyFeedPost.ReplyRef( 283 parent=reply_ref, root=root_ref 284 ), 285 labels=labels, 286 time_iso=post.get_timestamp(), 287 ) 288 else: 289 new_post = self.bsky.send_post( 290 text, labels=labels, time_iso=post.get_timestamp() 291 ) 292 root_ref = models.create_strong_ref(new_post) 293 294 self.bsky.create_gates( 295 self.options.thread_gate, 296 self.options.quote_gate, 297 new_post.uri, 298 time_iso=post.get_timestamp(), 299 ) 300 reply_ref = models.create_strong_ref(new_post) 301 created_records.append(new_post) 302 else: 303 # if a single post is an image - everything else is an image 304 if attachments[0].mime.startswith("image/"): 305 images: list[bytes] = [] 306 image_alts: list[str] = [] 307 image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = [] 308 309 for attachment in attachments: 310 image_io = compress_image(attachment.io, quality=100) 311 metadata = get_media_meta(image_io) 312 313 if len(image_io) > 1_000_000: 314 LOGGER.info("Compressing %s...", attachment.name) 315 image_io = compress_image(image_io) 316 317 images.append(image_io) 318 image_alts.append(attachment.alt) 319 image_aspect_ratios.append( 320 models.AppBskyEmbedDefs.AspectRatio( 321 width=metadata["width"], height=metadata["height"] 322 ) 323 ) 324 325 new_post = self.bsky.send_images( 326 text=post_text[0], 327 images=images, 328 image_alts=image_alts, 329 image_aspect_ratios=image_aspect_ratios, 330 reply_to=models.AppBskyFeedPost.ReplyRef( 331 parent=reply_ref, root=root_ref 332 ) 333 if root_ref and reply_ref 334 else None, 335 labels=labels, 336 time_iso=post.get_timestamp(), 337 ) 338 if not root_ref: 339 root_ref = models.create_strong_ref(new_post) 340 341 self.bsky.create_gates( 342 self.options.thread_gate, 343 self.options.quote_gate, 344 new_post.uri, 345 time_iso=post.get_timestamp(), 346 ) 347 reply_ref = models.create_strong_ref(new_post) 348 created_records.append(new_post) 349 else: # video is guarantedd to be one 350 metadata = get_media_meta(attachments[0].io) 351 if metadata["duration"] > 180: 352 LOGGER.info( 353 "Skipping post_id '%s', video attachment too long!", 354 post.get_id(), 355 ) 356 return 357 358 video_io = attachments[0].io 359 if attachments[0].mime != "video/mp4": 360 LOGGER.info("Converting %s to mp4...", attachments[0].name) 361 video_io = convert_to_mp4(video_io) 362 363 aspect_ratio = models.AppBskyEmbedDefs.AspectRatio( 364 width=metadata["width"], height=metadata["height"] 365 ) 366 367 new_post = self.bsky.send_video( 368 text=post_text[0], 369 video=video_io, 370 video_aspect_ratio=aspect_ratio, 371 video_alt=attachments[0].alt, 372 reply_to=models.AppBskyFeedPost.ReplyRef( 373 parent=reply_ref, root=root_ref 374 ) 375 if root_ref and reply_ref 376 else None, 377 labels=labels, 378 time_iso=post.get_timestamp(), 379 ) 380 if not root_ref: 381 root_ref = models.create_strong_ref(new_post) 382 383 self.bsky.create_gates( 384 self.options.thread_gate, 385 self.options.quote_gate, 386 new_post.uri, 387 time_iso=post.get_timestamp(), 388 ) 389 reply_ref = models.create_strong_ref(new_post) 390 created_records.append(new_post) 391 392 db_post = database.find_post( 393 self.db, post.get_id(), self.input.user_id, self.input.service 394 ) 395 assert db_post, "ghghghhhhh" 396 397 if new_root_id is None or new_parent_id is None: 398 new_root_id = database.insert_post( 399 self.db, created_records[0].uri, login.did, SERVICE 400 ) 401 database.store_data( 402 self.db, 403 created_records[0].uri, 404 login.did, 405 SERVICE, 406 {"cid": created_records[0].cid}, 407 ) 408 409 new_parent_id = new_root_id 410 database.insert_mapping(self.db, db_post["id"], new_parent_id) 411 created_records = created_records[1:] 412 413 for record in created_records: 414 new_parent_id = database.insert_reply( 415 self.db, record.uri, login.did, SERVICE, new_parent_id, new_root_id 416 ) 417 database.store_data( 418 self.db, record.uri, login.did, SERVICE, {"cid": record.cid} 419 ) 420 database.insert_mapping(self.db, db_post["id"], new_parent_id) 421 422 def delete_post(self, identifier: str): 423 login = self.__check_login() 424 425 post = database.find_post( 426 self.db, identifier, self.input.user_id, self.input.service 427 ) 428 if not post: 429 return 430 431 mappings = database.find_mappings(self.db, post["id"], SERVICE, login.did) 432 for mapping in mappings[::-1]: 433 LOGGER.info("Deleting '%s'...", mapping[0]) 434 self.bsky.delete_post(mapping[0]) 435 database.delete_post(self.db, mapping[0], SERVICE, login.did) 436 437 def accept_repost(self, repost_id: str, reposted_id: str): 438 login, repost = self.__delete_repost(repost_id) 439 if not (login and repost): 440 return 441 442 reposted = database.find_post( 443 self.db, reposted_id, self.input.user_id, self.input.service 444 ) 445 if not reposted: 446 return 447 448 # mappings of the reposted post 449 mappings = database.find_mappings(self.db, reposted["id"], SERVICE, login.did) 450 if mappings: 451 cid = database.fetch_data(self.db, mappings[0][0], login.did, SERVICE)[ 452 "cid" 453 ] 454 rsp = self.bsky.repost(mappings[0][0], cid) 455 456 internal_id = database.insert_repost( 457 self.db, rsp.uri, reposted["id"], login.did, SERVICE 458 ) 459 database.store_data(self.db, rsp.uri, login.did, SERVICE, {"cid": rsp.cid}) 460 database.insert_mapping(self.db, repost["id"], internal_id) 461 462 def __delete_repost( 463 self, repost_id: str 464 ) -> tuple[models.AppBskyActorDefs.ProfileViewDetailed | None, dict | None]: 465 login = self.__check_login() 466 467 repost = database.find_post( 468 self.db, repost_id, self.input.user_id, self.input.service 469 ) 470 if not repost: 471 return None, None 472 473 mappings = database.find_mappings(self.db, repost["id"], SERVICE, login.did) 474 if mappings: 475 LOGGER.info("Deleting '%s'...", mappings[0][0]) 476 self.bsky.unrepost(mappings[0][0]) 477 database.delete_post(self.db, mappings[0][0], login.did, SERVICE) 478 return login, repost 479 480 def delete_repost(self, repost_id: str): 481 self.__delete_repost(repost_id)