commit 619099ff1770936d155184d9d11481d5db88c0dc · zenfyr.dev/xpost

+31 -1

atproto2.py

···

       1
       1
        
       from typing import Any

     

       2
       2
       -
       from atproto import client_utils, Client, AtUri

     

       2
       2
       +
       from atproto import client_utils, Client, AtUri, IdResolver

     

       3
       3
        
       from atproto_client import models

     

       4
       4
       +
       from util import LOGGER

     

       5
       5
       +
       

     

       6
       6
       +
       def resolve_identity(

     

       7
       7
       +
           handle: str | None = None, 

     

       8
       8
       +
           did: str | None = None,

     

       9
       9
       +
           pds: str | None = None):

     

       10
       10
       +
           """helper to try and resolve identity from provided parameters, a valid handle is enough"""

     

       11
       11
       +
           

     

       12
       12
       +
           if did and pds:

     

       13
       13
       +
               return did, pds[:-1] if pds.endswith('/') else pds

     

       14
       14
       +
           

     

       15
       15
       +
           resolver = IdResolver()

     

       16
       16
       +
           if not did:

     

       17
       17
       +
               if not handle:

     

       18
       18
       +
                   raise Exception("ATP handle not specified!")

     

       19
       19
       +
               LOGGER.info("Resolving ATP identity for %s...", handle)

     

       20
       20
       +
               did = resolver.handle.resolve(handle)

     

       21
       21
       +
           if not did:

     

       22
       22
       +
               raise Exception("Failed to resolve DID!")

     

       23
       23
       +
           

     

       24
       24
       +
           if not pds:

     

       25
       25
       +
               LOGGER.info("Resolving PDS from DID document...")

     

       26
       26
       +
               did_doc = resolver.did.resolve(did)

     

       27
       27
       +
               if not did_doc:

     

       28
       28
       +
                   raise Exception("Failed to resolve DID doc for '%s'", did)

     

       29
       29
       +
               pds = did_doc.get_pds_endpoint()

     

       30
       30
       +
           if not pds:

     

       31
       31
       +
               raise Exception("Failed to resolve PDS!")

     

       32
       32
       +
           

     

       33
       33
       +
           return did, pds[:-1] if pds.endswith('/') else pds

     

       4
       34
        
       

     

       5
       35
        
       class Client2(Client):

     

       6
       36
        
           def __init__(self, base_url: str | None = None, *args: Any, **kwargs: Any) -> None:

+344 -44

bluesky.py

···

       1
       1
       -
       from atproto import client_utils, IdResolver, Request

     

       1
       1
       +
       from atproto import client_utils, Request, AsyncFirehoseSubscribeReposClient, CAR, CID

     

       2
       2
        
       from atproto_client import models

     

       3
       3
       -
       from atproto2 import Client2

     

       3
       3
       +
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       4
       4
       +
       from atproto_client.models.blob_ref import BlobRef

     

       5
       5
       +
       from atproto_firehose import models as firehose_models, parse_subscribe_repos_message as parse_firehose

     

       6
       6
       +
       from atproto2 import Client2, resolve_identity

     

       4
       7
        
       from httpx import Timeout

     

       5
       8
        
       import json

     

       6
       9
        
       import cross

     
···

       10
       13
        
       import media_util

     

       11
       14
        
       from util import LOGGER

     

       12
       15
        
       import re

     

       16
       16
       +
       from typing import Callable, Any

     

       13
       17
        
       

     

       14
       18
        
       # only for lexicon reference

     

       15
       19
        
       SERVICE = 'https://bsky.app'

     
···

       18
       22
        
       ADULT_PATTERN = re.compile(r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE)

     

       19
       23
        
       PORN_PATTERN  = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       20
       24
        
       

     

       25
       25
       +
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       26
       26
       +
           text: str = post.get('text', '')

     

       27
       27
       +
           if not text:

     

       28
       28
       +
               return []

     

       29
       29
       +
           text = text.encode(encoding='utf-8').decode(encoding='utf-8')

     

       30
       30
       +
           

     

       31
       31
       +
           facets: list[dict] = post.get('facets', [])

     

       32
       32
       +
           if not facets:

     

       33
       33
       +
               return [cross.TextToken(text)]

     

       34
       34
       +
           

     

       35
       35
       +
           slices: list[tuple[int, int, str, str]] = []

     

       36
       36
       +
           

     

       37
       37
       +
           for facet in facets:

     

       38
       38
       +
               features: list[dict] = facet.get('features', [])

     

       39
       39
       +
               if not features:

     

       40
       40
       +
                   continue

     

       41
       41
       +
               

     

       42
       42
       +
               # we don't support overlapping facets/features

     

       43
       43
       +
               feature = features[0]

     

       44
       44
       +
               feature_type = feature['$type']

     

       45
       45
       +
               index = facet['index']

     

       46
       46
       +
               if feature_type == 'app.bsky.richtext.facet#tag':

     

       47
       47
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'tag', feature['tag']))

     

       48
       48
       +
               elif feature_type == 'app.bsky.richtext.facet#link':

     

       49
       49
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'link', feature['uri']))

     

       50
       50
       +
               elif feature_type == 'app.bsky.richtext.facet#mention':

     

       51
       51
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))

     

       52
       52
       +
           

     

       53
       53
       +
           if not slices:

     

       54
       54
       +
               return [cross.TextToken(text)]

     

       55
       55
       +
           

     

       56
       56
       +
           slices.sort(key=lambda s: s[0])

     

       57
       57
       +
           unique: list[tuple[int, int, str, str]] = []

     

       58
       58
       +
           current_end = 0

     

       59
       59
       +
           for start, end, ttype, val in slices:

     

       60
       60
       +
               if start >= current_end:

     

       61
       61
       +
                   unique.append((start, end, ttype, val))

     

       62
       62
       +
                   current_end = end

     

       63
       63
       +
           

     

       64
       64
       +
           if not unique:

     

       65
       65
       +
               return [cross.TextToken(text)]

     

       66
       66
       +
           

     

       67
       67
       +
           tokens: list[cross.Token] = []

     

       68
       68
       +
           prev = 0

     

       69
       69
       +
           

     

       70
       70
       +
           for start, end, ttype, val in unique:

     

       71
       71
       +
               if start > prev:

     

       72
       72
       +
                   # text between facets

     

       73
       73
       +
                   tokens.append(cross.TextToken(text[prev:start]))

     

       74
       74
       +
               # facet token

     

       75
       75
       +
               if ttype == 'link':

     

       76
       76
       +
                   label = text[start:end]

     

       77
       77
       +
                   

     

       78
       78
       +
                   # try to unflatten links

     

       79
       79
       +
                   split = val.split('://')

     

       80
       80
       +
                   if len(split) > 1:

     

       81
       81
       +
                       if split[1].startswith(label):

     

       82
       82
       +
                           tokens.append(cross.LinkToken(val, ''))

     

       83
       83
       +
                       elif label.endswith('...') and split[1].startswith(label[:-3]):

     

       84
       84
       +
                           tokens.append(cross.LinkToken(val, ''))

     

       85
       85
       +
                   else:

     

       86
       86
       +
                       tokens.append(cross.LinkToken(val, label))

     

       87
       87
       +
               elif ttype == 'tag':

     

       88
       88
       +
                   tokens.append(cross.TagToken(val))

     

       89
       89
       +
               elif ttype == 'mention':

     

       90
       90
       +
                   tokens.append(cross.MentionToken(text[start:end], val))

     

       91
       91
       +
               prev = end

     

       92
       92
       +
       

     

       93
       93
       +
           if prev < len(text):

     

       94
       94
       +
               tokens.append(cross.TextToken(text[prev:]))

     

       95
       95
       +
               

     

       96
       96
       +
           for t in tokens:

     

       97
       97
       +
               print(t.__dict__)

     

       98
       98
       +
           

     

       99
       99
       +
           return tokens

     

       100
       100
       +
       

     

       101
       101
       +
       class BlueskyPost(cross.Post):

     

       102
       102
       +
           def __init__(self, pds_url: str, did: str, post: dict) -> None:

     

       103
       103
       +
               super().__init__()

     

       104
       104
       +
               self.post = post

     

       105
       105
       +
               self.tokens = tokenize_post(post)

     

       106
       106
       +
               

     

       107
       107
       +
               self.id = json.dumps(self.post['$xpost.strongRef'], sort_keys=True)

     

       108
       108
       +
               

     

       109
       109
       +
               self.parent_id = None

     

       110
       110
       +
               if self.post.get('reply'):

     

       111
       111
       +
                   self.parent_id = json.dumps(self.post['reply']['parent'], sort_keys=True)

     

       112
       112
       +
               

     

       113
       113
       +
               labels = self.post.get('labels', {}).get('values')

     

       114
       114
       +
               self.cw = ''

     

       115
       115
       +
               if labels:

     

       116
       116
       +
                   self.cw = ', '.join([str(label['val']).replace('-', ' ') for label in labels])

     

       117
       117
       +
               

     

       118
       118
       +
               def get_blob_url(blob: str):

     

       119
       119
       +
                   nonlocal pds_url, did

     

       120
       120
       +
                   return f'{pds_url}/xrpc/com.atproto.sync.getBlob?did={did}&cid={blob}'

     

       121
       121
       +
               

     

       122
       122
       +
               attachments: list[cross.MediaAttachment] = []

     

       123
       123
       +
               embed = self.post.get('embed', {})

     

       124
       124
       +
               if embed.get('$type') == 'app.bsky.embed.images':

     

       125
       125
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       126
       126
       +
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       127
       127
       +
                   

     

       128
       128
       +
                   for image in model.images:

     

       129
       129
       +
                       attachments.append(BlueskyAttachment(

     

       130
       130
       +
                           get_blob_url(image.image.cid.encode()),

     

       131
       131
       +
                           'image', image.alt

     

       132
       132
       +
                       ))

     

       133
       133
       +
               elif embed.get('$type') == 'app.bsky.embed.video':

     

       134
       134
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       135
       135
       +
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       136
       136
       +
                   

     

       137
       137
       +
                   attachments.append(BlueskyAttachment(

     

       138
       138
       +
                       get_blob_url(model.video.cid.encode()),

     

       139
       139
       +
                       'video', model.alt if model.alt else ''

     

       140
       140
       +
                   ))

     

       141
       141
       +
               self.attachments = attachments

     

       142
       142
       +
           

     

       143
       143
       +
           def get_tokens(self) -> list[cross.Token]:

     

       144
       144
       +
               return self.tokens

     

       145
       145
       +
           

     

       146
       146
       +
           def get_parent_id(self) -> str | None:

     

       147
       147
       +
               return self.parent_id

     

       148
       148
       +
           

     

       149
       149
       +
           def get_post_date_iso(self) -> str:

     

       150
       150
       +
               return self.post.get('createdAt') or super().get_post_date_iso()

     

       151
       151
       +
           

     

       152
       152
       +
           def get_cw(self) -> str:

     

       153
       153
       +
               return self.cw or ''

     

       154
       154
       +
           

     

       155
       155
       +
           def get_id(self) -> str:

     

       156
       156
       +
               return self.id

     

       157
       157
       +
       

     

       158
       158
       +
           def get_languages(self) -> list[str]:

     

       159
       159
       +
               return self.post.get('langs', []) or []

     

       160
       160
       +
           

     

       161
       161
       +
           def is_sensitive(self) -> bool:

     

       162
       162
       +
               return self.post.get('labels', {}).get('values') or False

     

       163
       163
       +
           

     

       164
       164
       +
           def get_attachments(self) -> list[cross.MediaAttachment]:

     

       165
       165
       +
               return self.attachments or []

     

       166
       166
       +
       

     

       167
       167
       +
       class BlueskyAttachment(cross.MediaAttachment):

     

       168
       168
       +
           def __init__(self, url: str, type: str, alt: str) -> None:

     

       169
       169
       +
               super().__init__()

     

       170
       170
       +
               self.url = url

     

       171
       171
       +
               self.type = type

     

       172
       172
       +
               self.alt = alt

     

       173
       173
       +
           

     

       174
       174
       +
           def get_url(self) -> str:

     

       175
       175
       +
               return self.url

     

       176
       176
       +
       

     

       177
       177
       +
           def get_type(self) -> str | None:

     

       178
       178
       +
               return self.type

     

       179
       179
       +
           

     

       180
       180
       +
           def create_meta(self, bytes: bytes) -> cross.MediaMeta:

     

       181
       181
       +
               o_meta = media_util.get_media_meta(bytes)

     

       182
       182
       +
               return cross.MediaMeta(o_meta['width'], o_meta['height'], o_meta.get('duration', -1))

     

       183
       183
       +
           

     

       184
       184
       +
           def get_alt(self) -> str:

     

       185
       185
       +
               return self.alt

     

       186
       186
       +
       

     

       187
       187
       +
       class BlueskyInput(cross.Input):

     

       188
       188
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       189
       189
       +
               self.options = settings.get('options', {})

     

       190
       190
       +
               did, pds = resolve_identity(

     

       191
       191
       +
                   handle=util.as_envvar(settings.get('hanlde')),

     

       192
       192
       +
                   did=util.as_envvar(settings.get('did')),

     

       193
       193
       +
                   pds=util.as_envvar(settings.get('pds'))

     

       194
       194
       +
               )

     

       195
       195
       +
               self.pds = pds

     

       196
       196
       +
               

     

       197
       197
       +
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       198
       198
       +
               super().__init__(SERVICE, did, settings, db)

     

       199
       199
       +
           

     

       200
       200
       +
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       201
       201
       +
               post_ref = json.dumps(post['$xpost.strongRef'], sort_keys=True)

     

       202
       202
       +
               

     

       203
       203
       +
               parent_ref = None

     

       204
       204
       +
               if post.get('reply'):

     

       205
       205
       +
                   parent_ref = json.dumps(post['reply']['parent'], sort_keys=True)

     

       206
       206
       +
                   

     

       207
       207
       +
               root_id = None

     

       208
       208
       +
               parent_id = None

     

       209
       209
       +
               if parent_ref:

     

       210
       210
       +
                   parent_post = database.find_post(self.db, parent_ref, self.user_id, self.service)

     

       211
       211
       +
                   if not parent_post:

     

       212
       212
       +
                       LOGGER.info("Skipping '%s' as parent post was not found in db!", post_ref)

     

       213
       213
       +
                       return

     

       214
       214
       +
                   

     

       215
       215
       +
                   root_id = parent_post['id']

     

       216
       216
       +
                   parent_id = root_id

     

       217
       217
       +
                   if parent_post['root_id']:

     

       218
       218
       +
                       root_id = parent_post['root_id']

     

       219
       219
       +
               

     

       220
       220
       +
               LOGGER.info("Crossposting '%s'...", post_ref)

     

       221
       221
       +
               if root_id and parent_id:

     

       222
       222
       +
                   database.insert_reply(

     

       223
       223
       +
                       self.db,

     

       224
       224
       +
                       post_ref,

     

       225
       225
       +
                       self.user_id,

     

       226
       226
       +
                       self.service,

     

       227
       227
       +
                       parent_id,

     

       228
       228
       +
                       root_id

     

       229
       229
       +
                   )

     

       230
       230
       +
               else:

     

       231
       231
       +
                   database.insert_post(

     

       232
       232
       +
                       self.db,

     

       233
       233
       +
                       post_ref,

     

       234
       234
       +
                       self.user_id,

     

       235
       235
       +
                       self.service

     

       236
       236
       +
                   )

     

       237
       237
       +
               

     

       238
       238
       +
               cross_post = BlueskyPost(self.pds, self.user_id, post)

     

       239
       239
       +
               for output in outputs:

     

       240
       240
       +
                   output.accept_post(cross_post)

     

       241
       241
       +
               return

     

       242
       242
       +
       

     

       243
       243
       +
           def _on_delete_post(self, outputs: list[cross.Output], post_id: dict):

     

       244
       244
       +
               identifier = json.dumps(post_id, sort_keys=True)

     

       245
       245
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       246
       246
       +
               if not post:

     

       247
       247
       +
                   return

     

       248
       248
       +
               

     

       249
       249
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       250
       250
       +
               for output in outputs:

     

       251
       251
       +
                   output.delete_post(identifier)

     

       252
       252
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       253
       253
       +
       

     

       254
       254
       +
       class BlueskyPdsInput(BlueskyInput):

     

       255
       255
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       256
       256
       +
               super().__init__(settings, db)

     

       257
       257
       +
           

     

       258
       258
       +
           def __on_commit(self, outputs: list[cross.Output], message: firehose_models.MessageFrame):

     

       259
       259
       +
               blocks = message.body.get('blocks')

     

       260
       260
       +
               if not blocks:

     

       261
       261
       +
                   return

     

       262
       262
       +
               

     

       263
       263
       +
               parsed = parse_firehose(message)

     

       264
       264
       +
               if not isinstance(parsed, models.ComAtprotoSyncSubscribeRepos.Commit):

     

       265
       265
       +
                   return

     

       266
       266
       +
               blocks = parsed.blocks

     

       267
       267
       +
                   

     

       268
       268
       +
               car = None

     

       269
       269
       +
               def get_lazy_repo() -> CAR:

     

       270
       270
       +
                   nonlocal car, blocks

     

       271
       271
       +
                       

     

       272
       272
       +
                   if isinstance(blocks, str):

     

       273
       273
       +
                       blocks = blocks.encode()

     

       274
       274
       +
                   assert blocks

     

       275
       275
       +
                       

     

       276
       276
       +
                   if car:

     

       277
       277
       +
                       return car

     

       278
       278
       +
                   car = CAR.from_bytes(blocks)

     

       279
       279
       +
                   return car

     

       280
       280
       +
                   

     

       281
       281
       +
               for op in parsed.ops:

     

       282
       282
       +
                   if op.action == 'delete':

     

       283
       283
       +
                       if not op.prev:

     

       284
       284
       +
                           continue

     

       285
       285
       +
       

     

       286
       286
       +
                       if not op.path.startswith('app.bsky.feed.post'):

     

       287
       287
       +
                           continue

     

       288
       288
       +
                       

     

       289
       289
       +
                       self._on_delete_post(outputs, {

     

       290
       290
       +
                           'cid': op.prev.encode(),

     

       291
       291
       +
                           'uri': f'at://{parsed.repo}/{op.path}' 

     

       292
       292
       +
                       })

     

       293
       293
       +
                       continue

     

       294
       294
       +
                       

     

       295
       295
       +
                   if op.action != 'create':

     

       296
       296
       +
                       continue

     

       297
       297
       +
       

     

       298
       298
       +
                   if not op.cid:

     

       299
       299
       +
                       continue

     

       300
       300
       +
                       

     

       301
       301
       +
                   record_data = get_lazy_repo().blocks.get(op.cid)

     

       302
       302
       +
                   if not record_data:

     

       303
       303
       +
                       continue

     

       304
       304
       +
                   

     

       305
       305
       +
                   record_dict = dict(record_data)

     

       306
       306
       +
                   record_dict['$xpost.strongRef'] = {

     

       307
       307
       +
                       'cid': op.cid.encode(),

     

       308
       308
       +
                       'uri': f'at://{parsed.repo}/{op.path}'

     

       309
       309
       +
                   }

     

       310
       310
       +
                   if record_dict['$type'] == 'app.bsky.feed.post':

     

       311
       311
       +
                       self._on_post(outputs, record_dict)

     

       312
       312
       +
                       

     

       313
       313
       +
           

     

       314
       314
       +
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       315
       315
       +
               streaming: str = f"wss://{self.pds.split("://", 1)[1]}/xrpc"

     

       316
       316
       +
               

     

       317
       317
       +
               client = AsyncFirehoseSubscribeReposClient(base_uri=streaming)

     

       318
       318
       +
               

     

       319
       319
       +
               async def on_message(message: firehose_models.MessageFrame):

     

       320
       320
       +
                   if message.header.t != '#commit':

     

       321
       321
       +
                       return

     

       322
       322
       +
                   

     

       323
       323
       +
                   if message.body.get('repo') != self.user_id:

     

       324
       324
       +
                       return

     

       325
       325
       +
                   

     

       326
       326
       +
                   if message.body.get('tooBig'):

     

       327
       327
       +
                       LOGGER.error("#commit message is tooBig!")

     

       328
       328
       +
                       return

     

       329
       329
       +
       

     

       330
       330
       +
                   submit(lambda: self.__on_commit(outputs, message))

     

       331
       331
       +
                   return

     

       332
       332
       +
               

     

       333
       333
       +
               LOGGER.info("Listening to %s...", streaming + '/com.atproto.sync.subscribeRepos')

     

       334
       334
       +
               await client.start(on_message)

     

       335
       335
       +
       

     

       21
       336
        
       class BlueskyOutput(cross.Output):

     

       22
       337
        
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       23
       338
        
               super().__init__(input, settings, db)

     
···

       26
       341
        
               if not util.as_envvar(settings.get('app-password')):

     

       27
       342
        
                   raise Exception("Account app password not provided!")

     

       28
       343
        
               

     

       29
       29
       -
               resolver = IdResolver()

     

       30
       30
       -
               did: str | None = util.as_envvar(settings.get('did'))

     

       31
       31
       -
               if not did:

     

       32
       32
       -
                   handle = util.as_envvar(settings.get('handle'))

     

       33
       33
       -
                   if not handle:

     

       34
       34
       -
                       raise Exception("ATP handle not specified!")

     

       35
       35
       -
                   LOGGER.info("Resolving ATP identity for %s...", handle)

     

       36
       36
       -
                   did = resolver.handle.resolve(handle)

     

       37
       37
       -
               if not did:

     

       38
       38
       -
                   raise Exception("Failed to resolve DID!")

     

       39
       39
       -
               

     

       40
       40
       -
               pds: str | None = util.as_envvar(settings.get('pds'))

     

       41
       41
       -
               if not pds:

     

       42
       42
       -
                   LOGGER.info("Resolving PDS from DID document...")

     

       43
       43
       -
                   did_doc = resolver.did.resolve(did)

     

       44
       44
       -
                   if not did_doc:

     

       45
       45
       -
                       raise Exception("Failed to resolve DID doc for '%s'", did)

     

       46
       46
       -
                   pds = did_doc.get_pds_endpoint()

     

       47
       47
       -
               if not pds:

     

       48
       48
       -
                   raise Exception("Failed to resolve PDS!")

     

       344
       344
       +
               did, pds = resolve_identity(

     

       345
       345
       +
                   handle=util.as_envvar(settings.get('hanlde')),

     

       346
       346
       +
                   did=util.as_envvar(settings.get('did')),

     

       347
       347
       +
                   pds=util.as_envvar(settings.get('pds'))

     

       348
       348
       +
               )

     

       49
       349
        
               

     

       50
       350
        
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       51
       351
        
               

     
···

       57
       357
        
               if not login:

     

       58
       358
        
                   raise Exception("Client not logged in!")

     

       59
       359
        
               

     

       60
       60
       -
               reply_data = database.find_post(self.db, parent_id, self.input.user_id, self.input.service)

     

       61
       61
       -
               assert reply_data, "reply_data requested, but doesn't exist in db (should've been skipped bt firehose)"

     

       62
       62
       -
                   

     

       63
       63
       -
               reply_mappings = [json.loads(data[0]) for data in database.find_mappings(self.db, reply_data['id'], SERVICE, login.did)]

     

       64
       64
       -
               if not reply_mappings:

     

       65
       65
       -
                   LOGGER.error("Failed to find mappings for a post in the db!")

     

       360
       360
       +
               thread_tuple = database.find_mapped_thread(

     

       361
       361
       +
                   self.db,

     

       362
       362
       +
                   parent_id,

     

       363
       363
       +
                   self.input.user_id,

     

       364
       364
       +
                   self.input.service,

     

       365
       365
       +
                   login.did,

     

       366
       366
       +
                   SERVICE

     

       367
       367
       +
               )

     

       368
       368
       +
               

     

       369
       369
       +
               if not thread_tuple:

     

       370
       370
       +
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       66
       371
        
                   return None

     

       67
       67
       -
                   

     

       68
       68
       -
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_mappings[-1]['uri']), cid=str(reply_mappings[-1]['cid']))

     

       69
       69
       -
               root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_mappings[0]['uri']), cid=str(reply_mappings[0]['cid']))

     

       70
       70
       -
               if reply_data['root_id']:

     

       71
       71
       -
                   root_data = database.find_post_by_id(self.db, reply_data['root_id'])

     

       72
       72
       -
                   assert root_data, "root_data requested but doesn't exist in db"

     

       73
       73
       -
                       

     

       74
       74
       -
                   root_mappings = [json.loads(data[0]) for data in database.find_mappings(self.db, reply_data['root_id'], SERVICE, login.did)]

     

       75
       75
       -
                   if not root_mappings:

     

       76
       76
       -
                       LOGGER.error("Failed to find mappings for a post in the db!")

     

       77
       77
       -
                       return None

     

       78
       78
       -
                   root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_mappings[0]['uri']), cid=str(root_mappings[0]['cid']))

     

       372
       372
       +
               

     

       373
       373
       +
               root_ref = json.loads(thread_tuple[0])

     

       374
       374
       +
               reply_ref = json.loads(thread_tuple[1])

     

       375
       375
       +
               

     

       376
       376
       +
               root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_ref['uri']), cid=str(root_ref['cid']))

     

       377
       377
       +
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_ref['uri']), cid=str(reply_ref['cid']))

     

       79
       378
        
               

     

       80
       379
        
               return (

     

       81
       380
        
                   models.create_strong_ref(root_record),

     

       82
       381
        
                   models.create_strong_ref(reply_record),

     

       83
       83
       -
                   reply_data['root_id'],

     

       84
       84
       -
                   reply_data['id']

     

       382
       382
       +
                   thread_tuple[2],

     

       383
       383
       +
                   thread_tuple[3]

     

       85
       384
        
               )

     

       86
       385
        
           

     

       87
       386
        
           def _split_attachments(self, attachments: list[cross.MediaAttachment]):

     
···

       164
       463
        
                       return

     

       165
       464
        
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       166
       465
        
               

     

       167
       167
       -
               tokens = post.get_tokens()

     

       466
       466
       +
               tokens = post.get_tokens().copy()

     

       168
       467
        
               

     

       169
       468
        
               unique_labels: set[str] = set()

     

       170
       469
        
               cw = post.get_cw()

     
···

       196
       495
        
                       tokens.append(cross.TextToken(' '))

     

       197
       496
        
       

     

       198
       497
        
               

     

       199
       199
       -
               split_tokens: list[list[cross.Token]] = util.split_tokens(post.get_tokens(), 300)

     

       498
       498
       +
               split_tokens: list[list[cross.Token]] = util.split_tokens(tokens, 300)

     

       200
       499
        
               post_text: list[client_utils.TextBuilder] = []

     

       201
       500
        
               

     

       202
       501
        
               # convert tokens into rich text. skip post if contains unsupported tokens

     
···

       362
       661
        
               

     

       363
       662
        
               mappings = database.find_mappings(self.db, post['id'], SERVICE, login.did)

     

       364
       663
        
               for mapping in mappings[::-1]:

     

       664
       664
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       365
       665
        
                   self.bsky.delete_post(json.loads(mapping[0])['uri'])

     

       366
       666
        
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       367
       667

+1 -1

cross.py

···

       101
       101
        
               self.settings = settings

     

       102
       102
        
               self.db = db

     

       103
       103
        
           

     

       104
       104
       -
           async def listen(self, handler: Callable[[Post], Any]):

     

       104
       104
       +
           async def listen(self, outputs: list, handler: Callable[[Post], Any]):

     

       105
       105
        
               pass

     

       106
       106
        
       

     

       107
       107
        
       class Output():

+36

database.py

···

       45
       45
        
               with self.lock:

     

       46
       46
        
                   self.conn.close()

     

       47
       47
        
       

     

       48
       48
       +
       def find_mapped_thread(

     

       49
       49
       +
           db: DataBaseWorker, 

     

       50
       50
       +
           parent_id: str,

     

       51
       51
       +
           input_user: str,

     

       52
       52
       +
           input_service: str,

     

       53
       53
       +
           output_user: str,

     

       54
       54
       +
           output_service: str):

     

       55
       55
       +
               

     

       56
       56
       +
           reply_data: dict | None = find_post(db, parent_id, input_user, input_service)

     

       57
       57
       +
           if not reply_data:

     

       58
       58
       +
               return None

     

       59
       59
       +
                   

     

       60
       60
       +
           reply_mappings: list[str] | None = find_mappings(db, reply_data['id'], output_service, output_user)

     

       61
       61
       +
           if not reply_mappings:

     

       62
       62
       +
               return None

     

       63
       63
       +
                   

     

       64
       64
       +
           reply_identifier: str = reply_mappings[-1]

     

       65
       65
       +
           root_identifier: str = reply_mappings[0]

     

       66
       66
       +
           if reply_data['root_id']:

     

       67
       67
       +
               root_data = find_post_by_id(db, reply_data['root_id'])

     

       68
       68
       +
               if not root_data:

     

       69
       69
       +
                   return None

     

       70
       70
       +
                       

     

       71
       71
       +
               root_mappings = find_mappings(db, reply_data['root_id'], output_service, output_user)

     

       72
       72
       +
               if not root_mappings:

     

       73
       73
       +
                   return None

     

       74
       74
       +
               root_identifier = root_mappings[0]

     

       75
       75
       +
           

     

       76
       76
       +
           return (

     

       77
       77
       +
               root_identifier[0], # real ids

     

       78
       78
       +
               reply_identifier[0],

     

       79
       79
       +
               reply_data['root_id'], # db ids

     

       80
       80
       +
               reply_data['id']

     

       81
       81
       +
           )

     

       82
       82
       +
           

     

       83
       83
       +
       

     

       48
       84
        
       def insert_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str) -> int:

     

       49
       85
        
           db.execute(

     

       50
       86
        
               """

+10 -4

main.py

···

       34
       34
        
       

     

       35
       35
        
       INPUTS = {

     

       36
       36
        
           "mastodon-wss": lambda settings, db: mastodon.MastodonInput(settings, db),

     

       37
       37
       -
           "misskey-wss": lambda settigs, db: misskey.MisskeyInput(settigs, db)

     

       37
       37
       +
           "misskey-wss": lambda settigs, db: misskey.MisskeyInput(settigs, db),

     

       38
       38
       +
           "bluesky-pds-wss": lambda settings, db: bluesky.BlueskyPdsInput(settings, db)

     

       38
       39
        
       }

     

       39
       40
        
       

     

       40
       41
        
       OUTPUTS = {

     

       41
       41
       -
           "bluesky": lambda input, settings, db: bluesky.BlueskyOutput(input, settings, db)

     

       42
       42
       +
           "bluesky": lambda input, settings, db: bluesky.BlueskyOutput(input, settings, db),

     

       43
       43
       +
           "mastodon": lambda input, settings, db: mastodon.MastodonOutput(input, settings, db)

     

       42
       44
        
       }

     

       43
       45
        
       

     

       44
       46
        
       def execute(data_dir):

     
···

       102
       104
        
           

     

       103
       105
        
           input = INPUTS[input_settings['type']](input_settings, db_worker)

     

       104
       106
        
           

     

       107
       107
       +
           if not outputs_settings:

     

       108
       108
       +
               LOGGER.warning("No outputs specified! Check your config!")

     

       109
       109
       +
           

     

       105
       110
        
           outputs: list[cross.Output] = []

     

       106
       111
        
           for output_settings in outputs_settings:

     

       107
       112
        
               outputs.append(OUTPUTS[output_settings['type']](input, output_settings, db_worker))

     
···

       115
       120
        
                   

     

       116
       121
        
                   try:

     

       117
       122
        
                       task()

     

       118
       118
       -
                       queue.task_done()

     

       119
       123
        
                   except Exception as e:

     

       120
       124
        
                       LOGGER.error(f"Exception in worker thread!\n{e}")

     

       121
       125
        
                       traceback.print_exc()

     

       126
       126
       +
                   finally:

     

       127
       127
       +
                       queue.task_done()

     

       122
       128
        
           

     

       123
       129
        
           task_queue = queue.Queue()

     

       124
       124
       -
           thread = threading.Thread(target=worker, args=(task_queue,))

     

       130
       130
       +
           thread = threading.Thread(target=worker, args=(task_queue,), daemon=True)

     

       125
       131
        
           thread.start()

     

       126
       132
        
           

     

       127
       133
        
           LOGGER.info('Connecting to %s...', input.service)

+333 -1

mastodon.py

···

       4
       4
        
       import database

     

       5
       5
        
       from database import DataBaseWorker

     

       6
       6
        
       from typing import Callable, Any

     

       7
       7
       -
       import asyncio

     

       7
       7
       +
       import asyncio, time

     

       8
       8
       +
       import magic

     

       8
       9
        
       

     

       9
       10
        
       from bs4 import BeautifulSoup, Tag

     

       10
       11
        
       from bs4.element import NavigableString

     
···

       276
       277
        
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       277
       278
        
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       278
       279
        
                       continue

     

       280
       280
       +
       

     

       281
       281
       +
       class MastodonOutput(cross.Output):

     

       282
       282
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       283
       283
       +
               super().__init__(input, settings, db)

     

       284
       284
       +
               self.options = settings.get('options') or {}

     

       285
       285
       +
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       286
       286
       +
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       287
       287
       +
               

     

       288
       288
       +
               self.service = instance[:-1] if instance.endswith('/') else instance

     

       289
       289
       +
               

     

       290
       290
       +
               LOGGER.info("Verifying %s credentails...", self.service)

     

       291
       291
       +
               responce = requests.get(f"{self.service}/api/v1/accounts/verify_credentials", headers={

     

       292
       292
       +
                   'Authorization': f'Bearer {self.token}'

     

       293
       293
       +
               })

     

       294
       294
       +
               if responce.status_code != 200:

     

       295
       295
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       296
       296
       +
                   responce.raise_for_status()

     

       297
       297
       +
                   return

     

       298
       298
       +
               self.user_id: str = responce.json()["id"]

     

       299
       299
       +
       

     

       300
       300
       +
               LOGGER.info("Getting %s configuration...", self.service)

     

       301
       301
       +
               responce = requests.get(f"{self.service}/api/v1/instance", headers={

     

       302
       302
       +
                   'Authorization': f'Bearer {self.token}'

     

       303
       303
       +
               })

     

       304
       304
       +
               if responce.status_code != 200:

     

       305
       305
       +
                   LOGGER.error("Failed to get instance info!")

     

       306
       306
       +
                   responce.raise_for_status()

     

       307
       307
       +
                   return

     

       308
       308
       +
               

     

       309
       309
       +
               instance_info: dict = responce.json()

     

       310
       310
       +
               configuration: dict = instance_info['configuration']

     

       311
       311
       +
               

     

       312
       312
       +
               statuses_config: dict = configuration.get('statuses', {})

     

       313
       313
       +
               self.max_characters: int = statuses_config.get('max_characters', 500)

     

       314
       314
       +
               self.max_media_attachments: int = statuses_config.get('max_media_attachments', 4)

     

       315
       315
       +
               self.characters_reserved_per_url: int = statuses_config.get('characters_reserved_per_url', 23)

     

       316
       316
       +
               

     

       317
       317
       +
               media_config: dict = configuration.get('media_attachments', {})

     

       318
       318
       +
               self.image_size_limit: int = media_config.get('image_size_limit', 16777216)

     

       319
       319
       +
               self.video_size_limit: int = media_config.get('video_size_limit', 103809024)

     

       320
       320
       +
               self.supported_mime_types: list[str] = media_config.get('supported_mime_types', [

     

       321
       321
       +
                   'audio/ogg',

     

       322
       322
       +
                   'image/jpeg',

     

       323
       323
       +
                   'image/png',

     

       324
       324
       +
                   'video/mp4'

     

       325
       325
       +
               ])

     

       326
       326
       +
               

     

       327
       327
       +
               # *oma max post chars

     

       328
       328
       +
               max_toot_chars = instance_info.get('max_toot_chars')

     

       329
       329
       +
               if max_toot_chars:

     

       330
       330
       +
                   self.max_characters: int = max_toot_chars

     

       331
       331
       +
               

     

       332
       332
       +
               # *oma max upload limit

     

       333
       333
       +
               upload_limit = instance_info.get('upload_limit')

     

       334
       334
       +
               if upload_limit:

     

       335
       335
       +
                   self.image_size_limit: int = upload_limit

     

       336
       336
       +
                   self.video_size_limit: int = upload_limit

     

       337
       337
       +
               

     

       338
       338
       +
               self.text_format = 'text/plain'

     

       339
       339
       +
               pleroma = instance_info.get('pleroma')

     

       340
       340
       +
               if pleroma:

     

       341
       341
       +
                   post_formats: list[str] = pleroma.get('metadata', {}).get('post_formats', [])

     

       342
       342
       +
                   if 'text/x.misskeymarkdown' in post_formats:

     

       343
       343
       +
                       self.text_format = 'text/x.misskeymarkdown'

     

       344
       344
       +
                   elif 'text/markdown' in post_formats:

     

       345
       345
       +
                       self.text_format = 'text/markdown'

     

       346
       346
       +
           

     

       347
       347
       +
           def upload_media(self, attachments: list[cross.MediaAttachment]) -> list[str] | None:

     

       348
       348
       +
               prepare: list[tuple[str, str, bytes]] = []

     

       349
       349
       +
               

     

       350
       350
       +
               for attachment in attachments:

     

       351
       351
       +
                   alt = attachment.get_alt()

     

       352
       352
       +
                   mbytes: bytes | None

     

       353
       353
       +
                   

     

       354
       354
       +
                   if attachment.get_type() == 'image':

     

       355
       355
       +
                       mbytes = media_util.download_blob(attachment.get_url(), self.image_size_limit)

     

       356
       356
       +
                   elif attachment.get_type() in {'video', 'gif'}:

     

       357
       357
       +
                       mbytes = media_util.download_blob(attachment.get_url(), self.video_size_limit)

     

       358
       358
       +
                   else:

     

       359
       359
       +
                       mbytes = media_util.download_blob(attachment.get_url(), 7_000_000)

     

       360
       360
       +
                       

     

       361
       361
       +
                   if not mbytes:

     

       362
       362
       +
                       return None

     

       363
       363
       +
                   

     

       364
       364
       +
                   filename = media_util.get_filename_from_url(attachment.get_url())

     

       365
       365
       +
                   LOGGER.info("Downloaded %s", filename)

     

       366
       366
       +
                   prepare.append((filename, alt, mbytes))

     

       367
       367
       +
               

     

       368
       368
       +
               uploads: list[dict] = []

     

       369
       369
       +
               

     

       370
       370
       +
               for name, desc, bbytes in prepare:

     

       371
       371
       +
                   mime_type = magic.Magic(mime=True).from_buffer(bbytes)

     

       372
       372
       +
                   if not mime_type:

     

       373
       373
       +
                       mime_type = 'application/octet-stream'

     

       374
       374
       +
                       

     

       375
       375
       +
                   files = {

     

       376
       376
       +
                       'file': (name, bbytes, mime_type)

     

       377
       377
       +
                   }

     

       378
       378
       +
                   data = {}

     

       379
       379
       +
                   if desc:

     

       380
       380
       +
                       data['description'] = desc

     

       381
       381
       +
                   

     

       382
       382
       +
                   req = requests.post(f"{self.service}/api/v2/media", headers= {

     

       383
       383
       +
                       'Authorization': f'Bearer {self.token}'

     

       384
       384
       +
                   }, files=files, data=data)

     

       385
       385
       +
                   

     

       386
       386
       +
                   if req.status_code == 200:

     

       387
       387
       +
                       LOGGER.info("Uploaded %s! (%s)", name, req.json()['id'])

     

       388
       388
       +
                       uploads.append({

     

       389
       389
       +
                           'done': True,

     

       390
       390
       +
                           'id': req.json()['id']

     

       391
       391
       +
                       })

     

       392
       392
       +
                   elif req.status_code == 202:

     

       393
       393
       +
                       LOGGER.info("Waiting for %s to process!", name)

     

       394
       394
       +
                       uploads.append({

     

       395
       395
       +
                           'done': False,

     

       396
       396
       +
                           'id': req.json()['id']

     

       397
       397
       +
                       })

     

       398
       398
       +
                   else:

     

       399
       399
       +
                       LOGGER.error("Failes to download %s! %s", name, req.text)

     

       400
       400
       +
                       req.raise_for_status()

     

       401
       401
       +
               

     

       402
       402
       +
               while any([not val['done'] for val in uploads]):

     

       403
       403
       +
                   LOGGER.info("Waiting for media to process...")

     

       404
       404
       +
                   time.sleep(3)

     

       405
       405
       +
                   for media in uploads:

     

       406
       406
       +
                       if media['done']:

     

       407
       407
       +
                           continue

     

       408
       408
       +
                       

     

       409
       409
       +
                       reqs = requests.get(f'{self.service}/api/v1/media/{media['id']}', headers={

     

       410
       410
       +
                           'Authorization': f'Bearer {self.token}'

     

       411
       411
       +
                       })

     

       412
       412
       +
                       

     

       413
       413
       +
                       if reqs.status_code == 206:

     

       414
       414
       +
                           continue

     

       415
       415
       +
                       

     

       416
       416
       +
                       if reqs.status_code == 200:

     

       417
       417
       +
                           media['done'] = True

     

       418
       418
       +
                           continue

     

       419
       419
       +
                       reqs.raise_for_status()

     

       420
       420
       +
               

     

       421
       421
       +
               return [val['id'] for val in uploads]

     

       422
       422
       +
       

     

       423
       423
       +
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       424
       424
       +
               p_text: str = ''

     

       425
       425
       +
                   

     

       426
       426
       +
               for token in tokens:

     

       427
       427
       +
                   if isinstance(token, cross.TextToken):

     

       428
       428
       +
                       p_text += token.text

     

       429
       429
       +
                   elif isinstance(token, cross.TagToken):

     

       430
       430
       +
                       p_text += '#' + token.tag

     

       431
       431
       +
                   elif isinstance(token, cross.LinkToken):

     

       432
       432
       +
                       if util.canonical_label(token.label, token.href):

     

       433
       433
       +
                           p_text += token.href

     

       434
       434
       +
                       else:

     

       435
       435
       +
                           if self.text_format == 'text/plain':

     

       436
       436
       +
                               p_text += f'{token.label}: {token.href}'

     

       437
       437
       +
                           elif self.text_format in {'text/x.misskeymarkdown', 'text/markdown'}:

     

       438
       438
       +
                               p_text += f'[{token.label}]({token.href})'

     

       439
       439
       +
                   else:

     

       440
       440
       +
                       return None

     

       441
       441
       +
               

     

       442
       442
       +
               return p_text

     

       443
       443
       +
       

     

       444
       444
       +
           def split_tokens_media(self, tokens: list[cross.Token], media: list[cross.MediaAttachment]):

     

       445
       445
       +
               split_tokens = util.split_tokens(tokens, self.max_characters, self.characters_reserved_per_url)

     

       446
       446
       +
               post_text: list[str] = []

     

       447
       447
       +
               

     

       448
       448
       +
               for block in split_tokens:

     

       449
       449
       +
                   baked_text = self.token_to_string(block)

     

       450
       450
       +
                   

     

       451
       451
       +
                   if baked_text is None:

     

       452
       452
       +
                       return None

     

       453
       453
       +
                   post_text.append(baked_text)

     

       454
       454
       +
                       

     

       455
       455
       +
               if not post_text:

     

       456
       456
       +
                   post_text = ['']

     

       457
       457
       +
               

     

       458
       458
       +
               posts: list[dict] = [{"text": post_text, "attachments": []} for post_text in post_text]

     

       459
       459
       +
               available_indices: list[int] = list(range(len(posts)))

     

       460
       460
       +
               

     

       461
       461
       +
               current_image_post_idx: int | None = None

     

       462
       462
       +
               

     

       463
       463
       +
               def make_blank_post() -> dict:

     

       464
       464
       +
                   return {

     

       465
       465
       +
                       "text": '',

     

       466
       466
       +
                       "attachments": []

     

       467
       467
       +
                   }

     

       468
       468
       +
               

     

       469
       469
       +
               def pop_next_empty_index() -> int:

     

       470
       470
       +
                   if available_indices:

     

       471
       471
       +
                       return available_indices.pop(0)

     

       472
       472
       +
                   else:

     

       473
       473
       +
                       new_idx = len(posts)

     

       474
       474
       +
                       posts.append(make_blank_post())

     

       475
       475
       +
                       return new_idx

     

       476
       476
       +
               

     

       477
       477
       +
               for att in media:

     

       478
       478
       +
                   if (

     

       479
       479
       +
                       current_image_post_idx is not None

     

       480
       480
       +
                       and len(posts[current_image_post_idx]["attachments"]) < self.max_media_attachments

     

       481
       481
       +
                   ):

     

       482
       482
       +
                       posts[current_image_post_idx]["attachments"].append(att)

     

       483
       483
       +
                   else:

     

       484
       484
       +
                       idx = pop_next_empty_index()

     

       485
       485
       +
                       posts[idx]["attachments"].append(att)

     

       486
       486
       +
                       current_image_post_idx = idx

     

       487
       487
       +
               

     

       488
       488
       +
               result: list[tuple[str, list[cross.MediaAttachment]]] = []

     

       489
       489
       +
               

     

       490
       490
       +
               for p in posts:

     

       491
       491
       +
                   result.append((p['text'], p["attachments"]))

     

       492
       492
       +
               

     

       493
       493
       +
               return result

     

       494
       494
       +
               

     

       495
       495
       +
           def accept_post(self, post: cross.Post):

     

       496
       496
       +
               parent_id = post.get_parent_id()

     

       497
       497
       +
               

     

       498
       498
       +
               new_root_id: int | None = None

     

       499
       499
       +
               new_parent_id: int | None = None

     

       500
       500
       +
               

     

       501
       501
       +
               reply_ref: str | None = None

     

       502
       502
       +
               if parent_id:

     

       503
       503
       +
                   thread_tuple = database.find_mapped_thread(

     

       504
       504
       +
                       self.db,

     

       505
       505
       +
                       parent_id,

     

       506
       506
       +
                       self.input.user_id,

     

       507
       507
       +
                       self.input.service,

     

       508
       508
       +
                       self.user_id,

     

       509
       509
       +
                       self.service

     

       510
       510
       +
                   )

     

       511
       511
       +
                   

     

       512
       512
       +
                   if not thread_tuple:

     

       513
       513
       +
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       514
       514
       +
                       return None

     

       515
       515
       +
                   

     

       516
       516
       +
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       517
       517
       +
               

     

       518
       518
       +
               lang: str

     

       519
       519
       +
               if post.get_languages():

     

       520
       520
       +
                   lang = post.get_languages()[0]

     

       521
       521
       +
               else:

     

       522
       522
       +
                   lang = 'en'

     

       523
       523
       +
               

     

       524
       524
       +
               raw_statuses = self.split_tokens_media(post.get_tokens(), post.get_attachments())

     

       525
       525
       +
               if not raw_statuses:

     

       526
       526
       +
                   LOGGER.error("Failed to split post into statuses?")

     

       527
       527
       +
                   return None

     

       528
       528
       +
               baked_statuses = []

     

       529
       529
       +
               

     

       530
       530
       +
               for status, raw_media in raw_statuses:

     

       531
       531
       +
                   media: list[str] | None = None

     

       532
       532
       +
                   if raw_media:

     

       533
       533
       +
                       media = self.upload_media(raw_media)

     

       534
       534
       +
                       if not media:

     

       535
       535
       +
                           LOGGER.error("Failed to upload attachments!")

     

       536
       536
       +
                           return None

     

       537
       537
       +
                       baked_statuses.append((status, media))

     

       538
       538
       +
                       continue

     

       539
       539
       +
                   baked_statuses.append((status,[]))

     

       540
       540
       +
               

     

       541
       541
       +
               created_statuses: list[str] = []

     

       542
       542
       +
                   

     

       543
       543
       +
               for status, media in baked_statuses:

     

       544
       544
       +
                   payload = {

     

       545
       545
       +
                       'status': status,

     

       546
       546
       +
                       'media_ids': media or [],

     

       547
       547
       +
                       'spoiler_text': post.get_cw(),

     

       548
       548
       +
                       'visibility': 'unlisted',

     

       549
       549
       +
                       'content_type': self.text_format,

     

       550
       550
       +
                       'language': lang

     

       551
       551
       +
                   }

     

       552
       552
       +
                   

     

       553
       553
       +
                   if media:

     

       554
       554
       +
                       payload['sensitive'] = post.is_sensitive()

     

       555
       555
       +
                   

     

       556
       556
       +
                   if reply_ref:

     

       557
       557
       +
                       payload['in_reply_to_id'] = reply_ref

     

       558
       558
       +
               

     

       559
       559
       +
                   reqs = requests.post(f'{self.service}/api/v1/statuses', headers={

     

       560
       560
       +
                       'Authorization': f'Bearer {self.token}',

     

       561
       561
       +
                       'Content-Type': 'application/json'

     

       562
       562
       +
                   }, json=payload)

     

       563
       563
       +
               

     

       564
       564
       +
                   if reqs.status_code != 200:

     

       565
       565
       +
                       LOGGER.info("Failed to post status! %s - %s", reqs.status_code, reqs.text)

     

       566
       566
       +
                       reqs.raise_for_status()

     

       567
       567
       +
                   

     

       568
       568
       +
                   reply_ref = reqs.json()['id']

     

       569
       569
       +
                   LOGGER.info("Created new status %s!", reply_ref)

     

       570
       570
       +
                       

     

       571
       571
       +
                   created_statuses.append(reqs.json()['id'])

     

       572
       572
       +
               

     

       573
       573
       +
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       574
       574
       +
               assert db_post, "ghghghhhhh"

     

       575
       575
       +
               

     

       576
       576
       +
               if new_root_id is None or  new_parent_id is None:

     

       577
       577
       +
                   new_root_id = database.insert_post(

     

       578
       578
       +
                       self.db,

     

       579
       579
       +
                       created_statuses[0],

     

       580
       580
       +
                       self.user_id,

     

       581
       581
       +
                       self.service

     

       582
       582
       +
                   )

     

       583
       583
       +
                   new_parent_id = new_root_id

     

       584
       584
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       585
       585
       +
                   created_statuses = created_statuses[1:]

     

       586
       586
       +
               

     

       587
       587
       +
               for db_id in created_statuses:

     

       588
       588
       +
                   new_parent_id = database.insert_reply(

     

       589
       589
       +
                       self.db, 

     

       590
       590
       +
                       db_id,

     

       591
       591
       +
                       self.user_id,

     

       592
       592
       +
                       self.service,

     

       593
       593
       +
                       new_parent_id,

     

       594
       594
       +
                       new_root_id

     

       595
       595
       +
                   )

     

       596
       596
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       597
       597
       +
           

     

       598
       598
       +
           def delete_post(self, identifier: str):

     

       599
       599
       +
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       600
       600
       +
               if not post:

     

       601
       601
       +
                   return

     

       602
       602
       +
               

     

       603
       603
       +
               mappings = database.find_mappings(self.db, post['id'], self.service, self.user_id)

     

       604
       604
       +
               for mapping in mappings[::-1]:

     

       605
       605
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       606
       606
       +
                   requests.delete(f'{self.service}/api/v1/statuses/{mapping[0]}', headers={

     

       607
       607
       +
                       'Authorization': f'Bearer {self.token}'

     

       608
       608
       +
                   })

     

       609
       609
       +
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       610
       610
       +

+1 -1

media_util.py

···

       80
       80
        
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       81
       81
        
           response = requests.get(url, stream=True, timeout=20)

     

       82
       82
        
           if response.status_code != 200:

     

       83
       83
       -
               LOGGER.info("Failed to download %s! %s", url, response)

     

       83
       83
       +
               LOGGER.info("Failed to download %s! %s", url, response.text)

     

       84
       84
        
               return None

     

       85
       85
        
           

     

       86
       86
        
           downloaded_bytes = b""

pyproject.toml

···

       9
       9
        
           "bs4>=0.0.2",

     

       10
       10
        
           "click>=8.2.1",

     

       11
       11
        
           "html-to-markdown>=1.3.3",

     

       12
       12
       +
           "python-magic>=0.4.27",

     

       12
       13
        
           "requests>=2.32.3",

     

       13
       14
        
           "websockets>=13.1",

     

       14
       15
        
       ]

+2 -2

util.py

···

       18
       18
        
           

     

       19
       19
        
           return False

     

       20
       20
        
       

     

       21
       21
       -
       def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:

     

       21
       21
       +
       def split_tokens(tokens: list[cross.Token], max_chars: int, max_link_len: int = 35) -> list[list[cross.Token]]:

     

       22
       22
        
           def start_new_block():

     

       23
       23
        
               nonlocal current_block, blocks, current_length

     

       24
       24
        
               if current_block:

     
···

       105
       105
        
               elif isinstance(token, cross.LinkToken):

     

       106
       106
        
                   link_len = len(token.label)

     

       107
       107
        
                   if canonical_label(token.label, token.href):

     

       108
       108
       -
                       link_len = min(link_len, 35)

     

       108
       108
       +
                       link_len = min(link_len, max_link_len)

     

       109
       109
        
       

     

       110
       110
        
                   if current_length + link_len <= max_chars:

     

       111
       111
        
                       current_block.append(token)

+11

uv.lock

···

       370
       370
        
       ]

     

       371
       371
        
       

     

       372
       372
        
       [[package]]

     

       373
       373
       +
       name = "python-magic"

     

       374
       374
       +
       version = "0.4.27"

     

       375
       375
       +
       source = { registry = "https://pypi.org/simple" }

     

       376
       376
       +
       sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677, upload-time = "2022-06-07T20:16:59.508Z" }

     

       377
       377
       +
       wheels = [

     

       378
       378
       +
           { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" },

     

       379
       379
       +
       ]

     

       380
       380
       +
       

     

       381
       381
       +
       [[package]]

     

       373
       382
        
       name = "requests"

     

       374
       383
        
       version = "2.32.3"

     

       375
       384
        
       source = { registry = "https://pypi.org/simple" }

     
···

       472
       481
        
           { name = "bs4" },

     

       473
       482
        
           { name = "click" },

     

       474
       483
        
           { name = "html-to-markdown" },

     

       484
       484
       +
           { name = "python-magic" },

     

       475
       485
        
           { name = "requests" },

     

       476
       486
        
           { name = "websockets" },

     

       477
       487
        
       ]

     
···

       482
       492
        
           { name = "bs4", specifier = ">=0.0.2" },

     

       483
       493
        
           { name = "click", specifier = ">=8.2.1" },

     

       484
       494
        
           { name = "html-to-markdown", specifier = ">=1.3.3" },

     

       495
       495
       +
           { name = "python-magic", specifier = ">=0.4.27" },

     

       485
       496
        
           { name = "requests", specifier = ">=2.32.3" },

     

       486
       497
        
           { name = "websockets", specifier = ">=13.1" },

     

       487
       498
        
       ]