commit 30c2c81a5d20b7889c9d8a1af47aaae6ee92d809 · zenfyr.dev/xpost

+1 -1

atproto2.py bluesky/atproto2.py

···

       1
       1
        
       from typing import Any

     

       2
       2
        
       from atproto import client_utils, Client, AtUri, IdResolver

     

       3
       3
        
       from atproto_client import models

     

       4
       4
       -
       from util import LOGGER

     

       4
       4
       +
       from util.util import LOGGER

     

       5
       5
        
       

     

       6
       6
        
       def resolve_identity(

     

       7
       7
        
           handle: str | None = None,

-693

bluesky.py

···

       1
       1
       -
       from atproto import client_utils, Request, AsyncFirehoseSubscribeReposClient, CAR, CID

     

       2
       2
       -
       from atproto_client import models

     

       3
       3
       -
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       4
       4
       -
       from atproto_client.models.blob_ref import BlobRef

     

       5
       5
       -
       from atproto_firehose import models as firehose_models, parse_subscribe_repos_message as parse_firehose

     

       6
       6
       -
       from atproto2 import Client2, resolve_identity

     

       7
       7
       -
       from httpx import Timeout

     

       8
       8
       -
       import json

     

       9
       9
       -
       import cross

     

       10
       10
       -
       import database

     

       11
       11
       -
       from database import DataBaseWorker

     

       12
       12
       -
       import util

     

       13
       13
       -
       import media_util

     

       14
       14
       -
       from util import LOGGER

     

       15
       15
       -
       import re

     

       16
       16
       -
       from typing import Callable, Any

     

       17
       17
       -
       

     

       18
       18
       -
       # only for lexicon reference

     

       19
       19
       -
       SERVICE = 'https://bsky.app'

     

       20
       20
       -
       

     

       21
       21
       -
       # TODO this is terrible and stupid

     

       22
       22
       -
       ADULT_PATTERN = re.compile(r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE)

     

       23
       23
       -
       PORN_PATTERN  = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       24
       24
       -
       

     

       25
       25
       -
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       26
       26
       -
           text: str = post.get('text', '')

     

       27
       27
       -
           if not text:

     

       28
       28
       -
               return []

     

       29
       29
       -
           text = text.encode(encoding='utf-8').decode(encoding='utf-8')

     

       30
       30
       -
           

     

       31
       31
       -
           facets: list[dict] = post.get('facets', [])

     

       32
       32
       -
           if not facets:

     

       33
       33
       -
               return [cross.TextToken(text)]

     

       34
       34
       -
           

     

       35
       35
       -
           slices: list[tuple[int, int, str, str]] = []

     

       36
       36
       -
           

     

       37
       37
       -
           for facet in facets:

     

       38
       38
       -
               features: list[dict] = facet.get('features', [])

     

       39
       39
       -
               if not features:

     

       40
       40
       -
                   continue

     

       41
       41
       -
               

     

       42
       42
       -
               # we don't support overlapping facets/features

     

       43
       43
       -
               feature = features[0]

     

       44
       44
       -
               feature_type = feature['$type']

     

       45
       45
       -
               index = facet['index']

     

       46
       46
       -
               if feature_type == 'app.bsky.richtext.facet#tag':

     

       47
       47
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'tag', feature['tag']))

     

       48
       48
       -
               elif feature_type == 'app.bsky.richtext.facet#link':

     

       49
       49
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'link', feature['uri']))

     

       50
       50
       -
               elif feature_type == 'app.bsky.richtext.facet#mention':

     

       51
       51
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))

     

       52
       52
       -
           

     

       53
       53
       -
           if not slices:

     

       54
       54
       -
               return [cross.TextToken(text)]

     

       55
       55
       -
           

     

       56
       56
       -
           slices.sort(key=lambda s: s[0])

     

       57
       57
       -
           unique: list[tuple[int, int, str, str]] = []

     

       58
       58
       -
           current_end = 0

     

       59
       59
       -
           for start, end, ttype, val in slices:

     

       60
       60
       -
               if start >= current_end:

     

       61
       61
       -
                   unique.append((start, end, ttype, val))

     

       62
       62
       -
                   current_end = end

     

       63
       63
       -
           

     

       64
       64
       -
           if not unique:

     

       65
       65
       -
               return [cross.TextToken(text)]

     

       66
       66
       -
           

     

       67
       67
       -
           tokens: list[cross.Token] = []

     

       68
       68
       -
           prev = 0

     

       69
       69
       -
           

     

       70
       70
       -
           for start, end, ttype, val in unique:

     

       71
       71
       -
               if start > prev:

     

       72
       72
       -
                   # text between facets

     

       73
       73
       -
                   tokens.append(cross.TextToken(text[prev:start]))

     

       74
       74
       -
               # facet token

     

       75
       75
       -
               if ttype == 'link':

     

       76
       76
       -
                   label = text[start:end]

     

       77
       77
       -
                   

     

       78
       78
       -
                   # try to unflatten links

     

       79
       79
       -
                   split = val.split('://')

     

       80
       80
       -
                   if len(split) > 1:

     

       81
       81
       -
                       if split[1].startswith(label):

     

       82
       82
       -
                           tokens.append(cross.LinkToken(val, ''))

     

       83
       83
       -
                       elif label.endswith('...') and split[1].startswith(label[:-3]):

     

       84
       84
       -
                           tokens.append(cross.LinkToken(val, ''))

     

       85
       85
       -
                   else:

     

       86
       86
       -
                       tokens.append(cross.LinkToken(val, label))

     

       87
       87
       -
               elif ttype == 'tag':

     

       88
       88
       -
                   tokens.append(cross.TagToken(val))

     

       89
       89
       -
               elif ttype == 'mention':

     

       90
       90
       -
                   tokens.append(cross.MentionToken(text[start:end], val))

     

       91
       91
       -
               prev = end

     

       92
       92
       -
       

     

       93
       93
       -
           if prev < len(text):

     

       94
       94
       -
               tokens.append(cross.TextToken(text[prev:]))

     

       95
       95
       -
               

     

       96
       96
       -
           for t in tokens:

     

       97
       97
       -
               print(t.__dict__)

     

       98
       98
       -
           

     

       99
       99
       -
           return tokens

     

       100
       100
       -
       

     

       101
       101
       -
       class BlueskyPost(cross.Post):

     

       102
       102
       -
           def __init__(self, post: dict, tokens: list[cross.Token], attachments: list[media_util.MediaInfo]) -> None:

     

       103
       103
       -
               super().__init__()

     

       104
       104
       -
               self.post = post

     

       105
       105
       -
               self.tokens = tokens

     

       106
       106
       -
               

     

       107
       107
       -
               self.id = json.dumps(self.post['$xpost.strongRef'], sort_keys=True)

     

       108
       108
       -
               

     

       109
       109
       -
               self.parent_id = None

     

       110
       110
       -
               if self.post.get('reply'):

     

       111
       111
       -
                   self.parent_id = json.dumps(self.post['reply']['parent'], sort_keys=True)

     

       112
       112
       -
               

     

       113
       113
       -
               labels = self.post.get('labels', {}).get('values')

     

       114
       114
       -
               self.cw = ''

     

       115
       115
       -
               if labels:

     

       116
       116
       -
                   self.cw = ', '.join([str(label['val']).replace('-', ' ') for label in labels])

     

       117
       117
       -
               self.attachments = attachments

     

       118
       118
       -
           

     

       119
       119
       -
           def get_tokens(self) -> list[cross.Token]:

     

       120
       120
       -
               return self.tokens

     

       121
       121
       -
           

     

       122
       122
       -
           def get_parent_id(self) -> str | None:

     

       123
       123
       -
               return self.parent_id

     

       124
       124
       -
           

     

       125
       125
       -
           def get_post_date_iso(self) -> str:

     

       126
       126
       -
               return self.post.get('createdAt') or super().get_post_date_iso()

     

       127
       127
       -
           

     

       128
       128
       -
           def get_cw(self) -> str:

     

       129
       129
       -
               return self.cw or ''

     

       130
       130
       -
           

     

       131
       131
       -
           def get_id(self) -> str:

     

       132
       132
       -
               return self.id

     

       133
       133
       -
       

     

       134
       134
       -
           def get_languages(self) -> list[str]:

     

       135
       135
       -
               return self.post.get('langs', []) or []

     

       136
       136
       -
           

     

       137
       137
       -
           def is_sensitive(self) -> bool:

     

       138
       138
       -
               return self.post.get('labels', {}).get('values') or False

     

       139
       139
       -
           

     

       140
       140
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       141
       141
       -
               return self.attachments

     

       142
       142
       -
       

     

       143
       143
       -
       class BlueskyInputOptions():

     

       144
       144
       -
           def __init__(self, o: dict) -> None:

     

       145
       145
       -
               self.filters = [re.compile(f) for f in o.get('regex_filters', [])]

     

       146
       146
       -
       

     

       147
       147
       -
       class BlueskyInput(cross.Input):

     

       148
       148
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       149
       149
       -
               self.options = BlueskyInputOptions(settings.get('options', {}))

     

       150
       150
       -
               did, pds = resolve_identity(

     

       151
       151
       -
                   handle=util.as_envvar(settings.get('handle')),

     

       152
       152
       -
                   did=util.as_envvar(settings.get('did')),

     

       153
       153
       -
                   pds=util.as_envvar(settings.get('pds'))

     

       154
       154
       -
               )

     

       155
       155
       -
               self.pds = pds

     

       156
       156
       -
               

     

       157
       157
       -
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       158
       158
       -
               super().__init__(SERVICE, did, settings, db)

     

       159
       159
       -
           

     

       160
       160
       -
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       161
       161
       -
               post_ref = json.dumps(post['$xpost.strongRef'], sort_keys=True)

     

       162
       162
       -
               

     

       163
       163
       -
               parent_ref = None

     

       164
       164
       -
               if post.get('reply'):

     

       165
       165
       -
                   parent_ref = json.dumps(post['reply']['parent'], sort_keys=True)

     

       166
       166
       -
                   

     

       167
       167
       -
               success = database.try_insert_post(self.db, post_ref, parent_ref, self.user_id, self.service)

     

       168
       168
       -
               if not success:

     

       169
       169
       -
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", post_ref)

     

       170
       170
       -
                   return

     

       171
       171
       -
               

     

       172
       172
       -
               tokens = tokenize_post(post)

     

       173
       173
       -
               if not cross.test_filters(tokens, self.options.filters):

     

       174
       174
       -
                   LOGGER.info("Skipping '%s'. Matched a filter!", post_ref)

     

       175
       175
       -
                   return

     

       176
       176
       -
               

     

       177
       177
       -
               LOGGER.info("Crossposting '%s'...", post_ref)

     

       178
       178
       -
               

     

       179
       179
       -
               def get_blob_url(blob: str):

     

       180
       180
       -
                   return f'{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}'

     

       181
       181
       -
               

     

       182
       182
       -
               attachments: list[media_util.MediaInfo] = []

     

       183
       183
       -
               embed = post.get('embed', {})

     

       184
       184
       -
               if embed.get('$type') == 'app.bsky.embed.images':

     

       185
       185
       -
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       186
       186
       -
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       187
       187
       -
                   

     

       188
       188
       -
                   for image in model.images:

     

       189
       189
       -
                       url = get_blob_url(image.image.cid.encode())

     

       190
       190
       -
                       LOGGER.info("Downloading %s...", url)

     

       191
       191
       -
                       io = media_util.download_media(url, image.alt)

     

       192
       192
       -
                       if not io:

     

       193
       193
       -
                           LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       194
       194
       -
                           return

     

       195
       195
       -
                       attachments.append(io)

     

       196
       196
       -
               elif embed.get('$type') == 'app.bsky.embed.video':

     

       197
       197
       -
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       198
       198
       -
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       199
       199
       -
                   url = get_blob_url(model.video.cid.encode())

     

       200
       200
       -
                   LOGGER.info("Downloading %s...", url)

     

       201
       201
       -
                   io = media_util.download_media(url, model.alt if model.alt else '')

     

       202
       202
       -
                   if not io:

     

       203
       203
       -
                       LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       204
       204
       -
                       return

     

       205
       205
       -
                   attachments.append(io)

     

       206
       206
       -
                   

     

       207
       207
       -
               cross_post = BlueskyPost(post, tokens, attachments)

     

       208
       208
       -
               for output in outputs:

     

       209
       209
       -
                   output.accept_post(cross_post)

     

       210
       210
       -
               return

     

       211
       211
       -
       

     

       212
       212
       -
           def _on_delete_post(self, outputs: list[cross.Output], post_id: dict):

     

       213
       213
       -
               identifier = json.dumps(post_id, sort_keys=True)

     

       214
       214
       -
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       215
       215
       -
               if not post:

     

       216
       216
       -
                   return

     

       217
       217
       -
               

     

       218
       218
       -
               LOGGER.info("Deleting '%s'...", identifier)

     

       219
       219
       -
               for output in outputs:

     

       220
       220
       -
                   output.delete_post(identifier)

     

       221
       221
       -
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       222
       222
       -
       

     

       223
       223
       -
       class BlueskyPdsInput(BlueskyInput):

     

       224
       224
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       225
       225
       -
               super().__init__(settings, db)

     

       226
       226
       -
           

     

       227
       227
       -
           def __on_commit(self, outputs: list[cross.Output], message: firehose_models.MessageFrame):

     

       228
       228
       -
               blocks = message.body.get('blocks')

     

       229
       229
       -
               if not blocks:

     

       230
       230
       -
                   return

     

       231
       231
       -
               

     

       232
       232
       -
               parsed = parse_firehose(message)

     

       233
       233
       -
               if not isinstance(parsed, models.ComAtprotoSyncSubscribeRepos.Commit):

     

       234
       234
       -
                   return

     

       235
       235
       -
               blocks = parsed.blocks

     

       236
       236
       -
                   

     

       237
       237
       -
               car = None

     

       238
       238
       -
               def get_lazy_repo() -> CAR:

     

       239
       239
       -
                   nonlocal car, blocks

     

       240
       240
       -
                       

     

       241
       241
       -
                   if isinstance(blocks, str):

     

       242
       242
       -
                       blocks = blocks.encode()

     

       243
       243
       -
                   assert blocks

     

       244
       244
       -
                       

     

       245
       245
       -
                   if car:

     

       246
       246
       -
                       return car

     

       247
       247
       -
                   car = CAR.from_bytes(blocks)

     

       248
       248
       -
                   return car

     

       249
       249
       -
                   

     

       250
       250
       -
               for op in parsed.ops:

     

       251
       251
       -
                   if op.action == 'delete':

     

       252
       252
       -
                       if not op.prev:

     

       253
       253
       -
                           continue

     

       254
       254
       -
       

     

       255
       255
       -
                       if not op.path.startswith('app.bsky.feed.post'):

     

       256
       256
       -
                           continue

     

       257
       257
       -
                       

     

       258
       258
       -
                       self._on_delete_post(outputs, {

     

       259
       259
       -
                           'cid': op.prev.encode(),

     

       260
       260
       -
                           'uri': f'at://{parsed.repo}/{op.path}' 

     

       261
       261
       -
                       })

     

       262
       262
       -
                       continue

     

       263
       263
       -
                       

     

       264
       264
       -
                   if op.action != 'create':

     

       265
       265
       -
                       continue

     

       266
       266
       -
       

     

       267
       267
       -
                   if not op.cid:

     

       268
       268
       -
                       continue

     

       269
       269
       -
                       

     

       270
       270
       -
                   record_data = get_lazy_repo().blocks.get(op.cid)

     

       271
       271
       -
                   if not record_data:

     

       272
       272
       -
                       continue

     

       273
       273
       -
                   

     

       274
       274
       -
                   record_dict = dict(record_data)

     

       275
       275
       -
                   record_dict['$xpost.strongRef'] = {

     

       276
       276
       -
                       'cid': op.cid.encode(),

     

       277
       277
       -
                       'uri': f'at://{parsed.repo}/{op.path}'

     

       278
       278
       -
                   }

     

       279
       279
       -
                   if record_dict['$type'] == 'app.bsky.feed.post':

     

       280
       280
       -
                       self._on_post(outputs, record_dict)

     

       281
       281
       -
                       

     

       282
       282
       -
           

     

       283
       283
       -
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       284
       284
       -
               streaming: str = f"wss://{self.pds.split("://", 1)[1]}/xrpc"

     

       285
       285
       -
               

     

       286
       286
       -
               client = AsyncFirehoseSubscribeReposClient(base_uri=streaming)

     

       287
       287
       -
               

     

       288
       288
       -
               async def on_message(message: firehose_models.MessageFrame):

     

       289
       289
       -
                   if message.header.t != '#commit':

     

       290
       290
       -
                       return

     

       291
       291
       -
                   

     

       292
       292
       -
                   if message.body.get('repo') != self.user_id:

     

       293
       293
       -
                       return

     

       294
       294
       -
                   

     

       295
       295
       -
                   if message.body.get('tooBig'):

     

       296
       296
       -
                       LOGGER.error("#commit message is tooBig!")

     

       297
       297
       -
                       return

     

       298
       298
       -
       

     

       299
       299
       -
                   submit(lambda: self.__on_commit(outputs, message))

     

       300
       300
       -
                   return

     

       301
       301
       -
               

     

       302
       302
       -
               LOGGER.info("Listening to %s...", streaming + '/com.atproto.sync.subscribeRepos')

     

       303
       303
       -
               await client.start(on_message)

     

       304
       304
       -
       

     

       305
       305
       -
       ALLOWED_GATES = ['mentioned', 'following', 'followers', 'everybody']

     

       306
       306
       -
       

     

       307
       307
       -
       class BlueskyOutputOptions:

     

       308
       308
       -
           def __init__(self, o: dict) -> None:

     

       309
       309
       -
               self.quote_gate: bool = False

     

       310
       310
       -
               self.thread_gate: list[str] = ['everybody']

     

       311
       311
       -
               self.encode_videos: bool = True

     

       312
       312
       -
               

     

       313
       313
       -
               quote_gate = o.get('quote_gate')

     

       314
       314
       -
               if quote_gate is not None:

     

       315
       315
       -
                   self.quote_gate = bool(quote_gate)

     

       316
       316
       -
               

     

       317
       317
       -
               thread_gate = o.get('thread_gate')

     

       318
       318
       -
               if thread_gate is not None:

     

       319
       319
       -
                   if any([v not in ALLOWED_GATES for v in thread_gate]):

     

       320
       320
       -
                       raise ValueError(f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}")

     

       321
       321
       -
                   self.thread_gate = thread_gate

     

       322
       322
       -
               

     

       323
       323
       -
               encode_videos = o.get('encode_videos')

     

       324
       324
       -
               if encode_videos is not None:

     

       325
       325
       -
                   self.encode_videos = bool(encode_videos)

     

       326
       326
       -
       

     

       327
       327
       -
       class BlueskyOutput(cross.Output):

     

       328
       328
       -
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       329
       329
       -
               super().__init__(input, settings, db)

     

       330
       330
       -
               self.options = BlueskyOutputOptions(settings.get('options') or {})

     

       331
       331
       -
               

     

       332
       332
       -
               if not util.as_envvar(settings.get('app-password')):

     

       333
       333
       -
                   raise Exception("Account app password not provided!")

     

       334
       334
       -
               

     

       335
       335
       -
               did, pds = resolve_identity(

     

       336
       336
       -
                   handle=util.as_envvar(settings.get('handle')),

     

       337
       337
       -
                   did=util.as_envvar(settings.get('did')),

     

       338
       338
       -
                   pds=util.as_envvar(settings.get('pds'))

     

       339
       339
       -
               )

     

       340
       340
       -
               

     

       341
       341
       -
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       342
       342
       -
               

     

       343
       343
       -
               self.bsky = Client2(pds, request=reqs)

     

       344
       344
       -
               self.bsky.login(did, util.as_envvar(settings.get('app-password')))

     

       345
       345
       -
           

     

       346
       346
       -
           def _find_parent(self, parent_id: str):

     

       347
       347
       -
               login = self.bsky.me

     

       348
       348
       -
               if not login:

     

       349
       349
       -
                   raise Exception("Client not logged in!")

     

       350
       350
       -
               

     

       351
       351
       -
               thread_tuple = database.find_mapped_thread(

     

       352
       352
       -
                   self.db,

     

       353
       353
       -
                   parent_id,

     

       354
       354
       -
                   self.input.user_id,

     

       355
       355
       -
                   self.input.service,

     

       356
       356
       -
                   login.did,

     

       357
       357
       -
                   SERVICE

     

       358
       358
       -
               )

     

       359
       359
       -
               

     

       360
       360
       -
               if not thread_tuple:

     

       361
       361
       -
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       362
       362
       -
                   return None

     

       363
       363
       -
               

     

       364
       364
       -
               root_ref = json.loads(thread_tuple[0])

     

       365
       365
       -
               reply_ref = json.loads(thread_tuple[1])

     

       366
       366
       -
               

     

       367
       367
       -
               root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_ref['uri']), cid=str(root_ref['cid']))

     

       368
       368
       -
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_ref['uri']), cid=str(reply_ref['cid']))

     

       369
       369
       -
               

     

       370
       370
       -
               return (

     

       371
       371
       -
                   models.create_strong_ref(root_record),

     

       372
       372
       -
                   models.create_strong_ref(reply_record),

     

       373
       373
       -
                   thread_tuple[2],

     

       374
       374
       -
                   thread_tuple[3]

     

       375
       375
       -
               )

     

       376
       376
       -
           

     

       377
       377
       -
           def _split_attachments(self, attachments: list[media_util.MediaInfo]):

     

       378
       378
       -
               sup_media: list[media_util.MediaInfo] = []

     

       379
       379
       -
               unsup_media: list[media_util.MediaInfo] = []

     

       380
       380
       -
               

     

       381
       381
       -
               for a in attachments:

     

       382
       382
       -
                   if a.mime.startswith('image/') or a.mime.startswith('video/'): # TODO convert gifs to videos

     

       383
       383
       -
                       sup_media.append(a)

     

       384
       384
       -
                   else:

     

       385
       385
       -
                       unsup_media.append(a)

     

       386
       386
       -
               

     

       387
       387
       -
               return (sup_media, unsup_media)

     

       388
       388
       -
       

     

       389
       389
       -
           def _split_media_per_post(

     

       390
       390
       -
               self, 

     

       391
       391
       -
               tokens: list[client_utils.TextBuilder], 

     

       392
       392
       -
               media: list[media_util.MediaInfo]):

     

       393
       393
       -
               

     

       394
       394
       -
               posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]

     

       395
       395
       -
               available_indices: list[int] = list(range(len(posts)))

     

       396
       396
       -
               

     

       397
       397
       -
               current_image_post_idx: int | None = None

     

       398
       398
       -
       

     

       399
       399
       -
               def make_blank_post() -> dict:

     

       400
       400
       -
                   return {

     

       401
       401
       -
                       "tokens": [client_utils.TextBuilder().text('')],

     

       402
       402
       -
                       "attachments": []

     

       403
       403
       -
                   }

     

       404
       404
       -
               

     

       405
       405
       -
               def pop_next_empty_index() -> int:

     

       406
       406
       -
                   if available_indices:

     

       407
       407
       -
                       return available_indices.pop(0)

     

       408
       408
       -
                   else:

     

       409
       409
       -
                       new_idx = len(posts)

     

       410
       410
       -
                       posts.append(make_blank_post())

     

       411
       411
       -
                       return new_idx

     

       412
       412
       -
               

     

       413
       413
       -
               for att in media:

     

       414
       414
       -
                   if att.mime.startswith('video/'):

     

       415
       415
       -
                       current_image_post_idx = None

     

       416
       416
       -
                       idx = pop_next_empty_index()

     

       417
       417
       -
                       posts[idx]["attachments"].append(att)

     

       418
       418
       -
                   elif att.mime.startswith('image/'):

     

       419
       419
       -
                       if (

     

       420
       420
       -
                           current_image_post_idx is not None

     

       421
       421
       -
                           and len(posts[current_image_post_idx]["attachments"]) < 4

     

       422
       422
       -
                       ):

     

       423
       423
       -
                           posts[current_image_post_idx]["attachments"].append(att)

     

       424
       424
       -
                       else:

     

       425
       425
       -
                           idx = pop_next_empty_index()

     

       426
       426
       -
                           posts[idx]["attachments"].append(att)

     

       427
       427
       -
                           current_image_post_idx = idx

     

       428
       428
       -
               

     

       429
       429
       -
               result: list[tuple[client_utils.TextBuilder, list[media_util.MediaInfo]]] = []

     

       430
       430
       -
               for p in posts:

     

       431
       431
       -
                   result.append((p["tokens"], p["attachments"]))

     

       432
       432
       -
               return result

     

       433
       433
       -
           

     

       434
       434
       -
           def accept_post(self, post: cross.Post):

     

       435
       435
       -
               login = self.bsky.me

     

       436
       436
       -
               if not login:

     

       437
       437
       -
                   raise Exception("Client not logged in!")

     

       438
       438
       -
               

     

       439
       439
       -
               parent_id = post.get_parent_id()

     

       440
       440
       -
               

     

       441
       441
       -
               # used for db insertion

     

       442
       442
       -
               new_root_id = None

     

       443
       443
       -
               new_parent_id = None

     

       444
       444
       -
               

     

       445
       445
       -
               root_ref = None

     

       446
       446
       -
               reply_ref = None

     

       447
       447
       -
               if parent_id:

     

       448
       448
       -
                   parents = self._find_parent(parent_id)

     

       449
       449
       -
                   if not parents:

     

       450
       450
       -
                       return

     

       451
       451
       -
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       452
       452
       -
               

     

       453
       453
       -
               tokens = post.get_tokens().copy()

     

       454
       454
       -
               

     

       455
       455
       -
               unique_labels: set[str] = set()

     

       456
       456
       -
               cw = post.get_cw()

     

       457
       457
       -
               if cw:

     

       458
       458
       -
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       459
       459
       -
                   unique_labels.add('graphic-media')

     

       460
       460
       -
               

     

       461
       461
       -
               # from bsky.app, a post can only have one of those labels

     

       462
       462
       -
               if PORN_PATTERN.search(cw):

     

       463
       463
       -
                   unique_labels.add('porn')

     

       464
       464
       -
               elif ADULT_PATTERN.search(cw):

     

       465
       465
       -
                   unique_labels.add('sexual')

     

       466
       466
       -
               

     

       467
       467
       -
               if post.is_sensitive():

     

       468
       468
       -
                   unique_labels.add('graphic-media')

     

       469
       469
       -
               

     

       470
       470
       -
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in unique_labels])

     

       471
       471
       -
       

     

       472
       472
       -
               sup_media, unsup_media = self._split_attachments(post.get_attachments())

     

       473
       473
       -
       

     

       474
       474
       -
               if unsup_media:

     

       475
       475
       -
                   if tokens:

     

       476
       476
       -
                       tokens.append(cross.TextToken('\n'))

     

       477
       477
       -
                   for i, attachment in enumerate(unsup_media):

     

       478
       478
       -
                       tokens.append(cross.LinkToken(

     

       479
       479
       -
                               attachment.url,

     

       480
       480
       -
                               f"[{media_util.get_filename_from_url(attachment.url)}]"

     

       481
       481
       -
                       ))

     

       482
       482
       -
                       tokens.append(cross.TextToken(' '))

     

       483
       483
       -
       

     

       484
       484
       -
               

     

       485
       485
       -
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       486
       486
       -
               post_text: list[client_utils.TextBuilder] = []

     

       487
       487
       -
               

     

       488
       488
       -
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       489
       489
       -
               for block in split_tokens:

     

       490
       490
       -
                   rich_text = tokens_to_richtext(block)

     

       491
       491
       -
                   

     

       492
       492
       -
                   if not rich_text:

     

       493
       493
       -
                       LOGGER.error("Skipping '%s' as it contains invalid rich text types!", post.get_id())

     

       494
       494
       -
                       return

     

       495
       495
       -
                   post_text.append(rich_text)

     

       496
       496
       -
               

     

       497
       497
       -
               if not post_text:

     

       498
       498
       -
                   post_text = [client_utils.TextBuilder().text('')]

     

       499
       499
       -
               

     

       500
       500
       -
               for m in sup_media:

     

       501
       501
       -
                   if m.mime.startswith('image/'):

     

       502
       502
       -
                       if len(m.io) > 2_000_000:

     

       503
       503
       -
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large.", post.get_id())

     

       504
       504
       -
                           return

     

       505
       505
       -
                   

     

       506
       506
       -
                   if m.mime.startswith('video/'):

     

       507
       507
       -
                       if m.mime != 'video/mp4' and not self.options.encode_videos:

     

       508
       508
       -
                           LOGGER.info("Video is not mp4, but encoding is disabled. Skipping '%s'...", post.get_id())

     

       509
       509
       -
                           return

     

       510
       510
       -
                       

     

       511
       511
       -
                       if len(m.io) > 100_000_000:

     

       512
       512
       -
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large?", post.get_id())

     

       513
       513
       -
                           return

     

       514
       514
       -
               

     

       515
       515
       -
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       516
       516
       -
               baked_media = self._split_media_per_post(post_text, sup_media)

     

       517
       517
       -
               

     

       518
       518
       -
               for text, attachments in baked_media:

     

       519
       519
       -
                   if not attachments:

     

       520
       520
       -
                       if reply_ref and root_ref:

     

       521
       521
       -
                           new_post = self.bsky.send_post(text, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       522
       522
       -
                               parent=reply_ref,

     

       523
       523
       -
                               root=root_ref

     

       524
       524
       -
                           ), labels=labels, time_iso=post.get_post_date_iso())

     

       525
       525
       -
                       else:

     

       526
       526
       -
                           new_post = self.bsky.send_post(text, labels=labels, time_iso=post.get_post_date_iso())

     

       527
       527
       -
                           root_ref = models.create_strong_ref(new_post)

     

       528
       528
       -
                       

     

       529
       529
       -
                       self.bsky.create_gates(

     

       530
       530
       -
                           self.options.thread_gate, 

     

       531
       531
       -
                           self.options.quote_gate, 

     

       532
       532
       -
                           new_post.uri, 

     

       533
       533
       -
                           time_iso=post.get_post_date_iso()

     

       534
       534
       -
                       )

     

       535
       535
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       536
       536
       -
                       created_records.append(new_post)

     

       537
       537
       -
                   else:

     

       538
       538
       -
                       # if a single post is an image - everything else is an image

     

       539
       539
       -
                       if attachments[0].mime.startswith('image/'):

     

       540
       540
       -
                           images: list[bytes] = []

     

       541
       541
       -
                           image_alts: list[str] = []

     

       542
       542
       -
                           image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       543
       543
       -
                           

     

       544
       544
       -
                           for attachment in attachments:

     

       545
       545
       -
                               image_io = media_util.compress_image(attachment.io, quality=100)

     

       546
       546
       -
                               metadata = media_util.get_media_meta(image_io)

     

       547
       547
       -
                           

     

       548
       548
       -
                               if len(image_io) > 1_000_000:

     

       549
       549
       -
                                   LOGGER.info("Compressing %s...", attachment.name)

     

       550
       550
       -
                                   image_io = media_util.compress_image(image_io)

     

       551
       551
       -
                           

     

       552
       552
       -
                               images.append(image_io)

     

       553
       553
       -
                               image_alts.append(attachment.alt)

     

       554
       554
       -
                               image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(

     

       555
       555
       -
                                   width=metadata['width'], 

     

       556
       556
       -
                                   height=metadata['height']

     

       557
       557
       -
                               ))

     

       558
       558
       -
                           

     

       559
       559
       -
                           new_post = self.bsky.send_images(

     

       560
       560
       -
                               text=post_text[0],

     

       561
       561
       -
                               images=images,

     

       562
       562
       -
                               image_alts=image_alts,

     

       563
       563
       -
                               image_aspect_ratios=image_aspect_ratios,

     

       564
       564
       -
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       565
       565
       -
                                   parent=reply_ref,

     

       566
       566
       -
                                   root=root_ref

     

       567
       567
       -
                               ) if root_ref and reply_ref else None, 

     

       568
       568
       -
                               labels=labels, 

     

       569
       569
       -
                               time_iso=post.get_post_date_iso()

     

       570
       570
       -
                           )

     

       571
       571
       -
                           if not root_ref:

     

       572
       572
       -
                               root_ref = models.create_strong_ref(new_post)

     

       573
       573
       -
                           

     

       574
       574
       -
                           self.bsky.create_gates(

     

       575
       575
       -
                               self.options.thread_gate, 

     

       576
       576
       -
                               self.options.quote_gate,

     

       577
       577
       -
                               new_post.uri, 

     

       578
       578
       -
                               time_iso=post.get_post_date_iso()

     

       579
       579
       -
                           )

     

       580
       580
       -
                           reply_ref = models.create_strong_ref(new_post)

     

       581
       581
       -
                           created_records.append(new_post)

     

       582
       582
       -
                       else: # video is guarantedd to be one

     

       583
       583
       -
                           metadata = media_util.get_media_meta(attachments[0].io)

     

       584
       584
       -
                           if metadata['duration'] > 180:

     

       585
       585
       -
                               LOGGER.info("Skipping post_id '%s', video attachment too long!", post.get_id())

     

       586
       586
       -
                               return

     

       587
       587
       -
                       

     

       588
       588
       -
                           video_io = attachments[0].io

     

       589
       589
       -
                           if attachments[0].mime != 'video/mp4':

     

       590
       590
       -
                               LOGGER.info("Converting %s to mp4...", attachments[0].name)

     

       591
       591
       -
                               video_io = media_util.convert_to_mp4(video_io)

     

       592
       592
       -
                               

     

       593
       593
       -
                           aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       594
       594
       -
                               width=metadata['width'], 

     

       595
       595
       -
                               height=metadata['height']

     

       596
       596
       -
                           )

     

       597
       597
       -
                           

     

       598
       598
       -
                           new_post = self.bsky.send_video(

     

       599
       599
       -
                               text=post_text[0],

     

       600
       600
       -
                               video=video_io,

     

       601
       601
       -
                               video_aspect_ratio=aspect_ratio,

     

       602
       602
       -
                               video_alt=attachments[0].alt,

     

       603
       603
       -
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       604
       604
       -
                                   parent=reply_ref,

     

       605
       605
       -
                                   root=root_ref

     

       606
       606
       -
                               ) if root_ref and reply_ref else None,

     

       607
       607
       -
                               labels=labels,

     

       608
       608
       -
                               time_iso=post.get_post_date_iso()

     

       609
       609
       -
                           )

     

       610
       610
       -
                           if not root_ref:

     

       611
       611
       -
                               root_ref = models.create_strong_ref(new_post)

     

       612
       612
       -
                           

     

       613
       613
       -
                           self.bsky.create_gates(

     

       614
       614
       -
                               self.options.thread_gate,

     

       615
       615
       -
                               self.options.quote_gate, 

     

       616
       616
       -
                               new_post.uri, 

     

       617
       617
       -
                               time_iso=post.get_post_date_iso()

     

       618
       618
       -
                           )

     

       619
       619
       -
                           reply_ref = models.create_strong_ref(new_post)

     

       620
       620
       -
                           created_records.append(new_post)

     

       621
       621
       -
               

     

       622
       622
       -
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       623
       623
       -
               assert db_post, "ghghghhhhh"

     

       624
       624
       -
               

     

       625
       625
       -
               db_identifiers = [json.dumps(cr.model_dump(), sort_keys=True) for cr in created_records]

     

       626
       626
       -
               

     

       627
       627
       -
               if new_root_id is None or  new_parent_id is None:

     

       628
       628
       -
                   new_root_id = database.insert_post(

     

       629
       629
       -
                       self.db,

     

       630
       630
       -
                       db_identifiers[0],

     

       631
       631
       -
                       login.did,

     

       632
       632
       -
                       SERVICE

     

       633
       633
       -
                   )

     

       634
       634
       -
                   new_parent_id = new_root_id

     

       635
       635
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       636
       636
       -
                   db_identifiers = db_identifiers[1:]

     

       637
       637
       -
               

     

       638
       638
       -
               for db_id in db_identifiers:

     

       639
       639
       -
                   new_parent_id = database.insert_reply(

     

       640
       640
       -
                       self.db, 

     

       641
       641
       -
                       db_id,

     

       642
       642
       -
                       login.did,

     

       643
       643
       -
                       SERVICE,

     

       644
       644
       -
                       new_parent_id,

     

       645
       645
       -
                       new_root_id

     

       646
       646
       -
                   )

     

       647
       647
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       648
       648
       -
                   

     

       649
       649
       -
           def delete_post(self, identifier: str):

     

       650
       650
       -
               login = self.bsky.me

     

       651
       651
       -
               if not login:

     

       652
       652
       -
                   raise Exception("Client not logged in!")

     

       653
       653
       -
               

     

       654
       654
       -
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       655
       655
       -
               if not post:

     

       656
       656
       -
                   return

     

       657
       657
       -
               

     

       658
       658
       -
               mappings = database.find_mappings(self.db, post['id'], SERVICE, login.did)

     

       659
       659
       -
               for mapping in mappings[::-1]:

     

       660
       660
       -
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       661
       661
       -
                   self.bsky.delete_post(json.loads(mapping[0])['uri'])

     

       662
       662
       -
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       663
       663
       -
           

     

       664
       664
       -
       

     

       665
       665
       -
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       666
       666
       -
           builder = client_utils.TextBuilder()

     

       667
       667
       -
           

     

       668
       668
       -
           def flatten_link(href: str):

     

       669
       669
       -
               split = href.split('://', 1)

     

       670
       670
       -
               if len(split) > 1:

     

       671
       671
       -
                   href = split[1]

     

       672
       672
       -
               

     

       673
       673
       -
               if len(href) > 32:

     

       674
       674
       -
                   href = href[:32] + '...'

     

       675
       675
       -
               

     

       676
       676
       -
               return href

     

       677
       677
       -
            

     

       678
       678
       -
           for token in tokens:

     

       679
       679
       -
               if isinstance(token, cross.TextToken):

     

       680
       680
       -
                   builder.text(token.text)

     

       681
       681
       -
               elif isinstance(token, cross.LinkToken):

     

       682
       682
       -
                   if util.canonical_label(token.label, token.href):

     

       683
       683
       -
                       builder.link(flatten_link(token.href), token.href)

     

       684
       684
       -
                       continue

     

       685
       685
       -
                   

     

       686
       686
       -
                   builder.link(token.label, token.href)

     

       687
       687
       -
               elif isinstance(token, cross.TagToken):

     

       688
       688
       -
                   builder.tag('#' + token.tag, token.tag)

     

       689
       689
       -
               else:

     

       690
       690
       -
                   # fail on unsupported tokens

     

       691
       691
       -
                   return None

     

       692
       692
       -
               

     

       693
       693
       -
           return builder

+163

bluesky/common.py

···

       1
       1
       +
       import re, json

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import client_utils

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       from util.media import MediaInfo

     

       7
       7
       +
       from util.util import canonical_label

     

       8
       8
       +
       

     

       9
       9
       +
       # only for lexicon reference

     

       10
       10
       +
       SERVICE = 'https://bsky.app'

     

       11
       11
       +
       

     

       12
       12
       +
       # TODO this is terrible and stupid

     

       13
       13
       +
       ADULT_PATTERN = re.compile(r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE)

     

       14
       14
       +
       PORN_PATTERN  = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       15
       15
       +
       

     

       16
       16
       +
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       17
       17
       +
           text: str = post.get('text', '')

     

       18
       18
       +
           if not text:

     

       19
       19
       +
               return []

     

       20
       20
       +
           text = text.encode(encoding='utf-8').decode(encoding='utf-8')

     

       21
       21
       +
           

     

       22
       22
       +
           facets: list[dict] = post.get('facets', [])

     

       23
       23
       +
           if not facets:

     

       24
       24
       +
               return [cross.TextToken(text)]

     

       25
       25
       +
           

     

       26
       26
       +
           slices: list[tuple[int, int, str, str]] = []

     

       27
       27
       +
           

     

       28
       28
       +
           for facet in facets:

     

       29
       29
       +
               features: list[dict] = facet.get('features', [])

     

       30
       30
       +
               if not features:

     

       31
       31
       +
                   continue

     

       32
       32
       +
               

     

       33
       33
       +
               # we don't support overlapping facets/features

     

       34
       34
       +
               feature = features[0]

     

       35
       35
       +
               feature_type = feature['$type']

     

       36
       36
       +
               index = facet['index']

     

       37
       37
       +
               if feature_type == 'app.bsky.richtext.facet#tag':

     

       38
       38
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'tag', feature['tag']))

     

       39
       39
       +
               elif feature_type == 'app.bsky.richtext.facet#link':

     

       40
       40
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'link', feature['uri']))

     

       41
       41
       +
               elif feature_type == 'app.bsky.richtext.facet#mention':

     

       42
       42
       +
                   slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))

     

       43
       43
       +
           

     

       44
       44
       +
           if not slices:

     

       45
       45
       +
               return [cross.TextToken(text)]

     

       46
       46
       +
           

     

       47
       47
       +
           slices.sort(key=lambda s: s[0])

     

       48
       48
       +
           unique: list[tuple[int, int, str, str]] = []

     

       49
       49
       +
           current_end = 0

     

       50
       50
       +
           for start, end, ttype, val in slices:

     

       51
       51
       +
               if start >= current_end:

     

       52
       52
       +
                   unique.append((start, end, ttype, val))

     

       53
       53
       +
                   current_end = end

     

       54
       54
       +
           

     

       55
       55
       +
           if not unique:

     

       56
       56
       +
               return [cross.TextToken(text)]

     

       57
       57
       +
           

     

       58
       58
       +
           tokens: list[cross.Token] = []

     

       59
       59
       +
           prev = 0

     

       60
       60
       +
           

     

       61
       61
       +
           for start, end, ttype, val in unique:

     

       62
       62
       +
               if start > prev:

     

       63
       63
       +
                   # text between facets

     

       64
       64
       +
                   tokens.append(cross.TextToken(text[prev:start]))

     

       65
       65
       +
               # facet token

     

       66
       66
       +
               if ttype == 'link':

     

       67
       67
       +
                   label = text[start:end]

     

       68
       68
       +
                   

     

       69
       69
       +
                   # try to unflatten links

     

       70
       70
       +
                   split = val.split('://')

     

       71
       71
       +
                   if len(split) > 1:

     

       72
       72
       +
                       if split[1].startswith(label):

     

       73
       73
       +
                           tokens.append(cross.LinkToken(val, ''))

     

       74
       74
       +
                       elif label.endswith('...') and split[1].startswith(label[:-3]):

     

       75
       75
       +
                           tokens.append(cross.LinkToken(val, ''))

     

       76
       76
       +
                   else:

     

       77
       77
       +
                       tokens.append(cross.LinkToken(val, label))

     

       78
       78
       +
               elif ttype == 'tag':

     

       79
       79
       +
                   tokens.append(cross.TagToken(val))

     

       80
       80
       +
               elif ttype == 'mention':

     

       81
       81
       +
                   tokens.append(cross.MentionToken(text[start:end], val))

     

       82
       82
       +
               prev = end

     

       83
       83
       +
       

     

       84
       84
       +
           if prev < len(text):

     

       85
       85
       +
               tokens.append(cross.TextToken(text[prev:]))

     

       86
       86
       +
               

     

       87
       87
       +
           for t in tokens:

     

       88
       88
       +
               print(t.__dict__)

     

       89
       89
       +
           

     

       90
       90
       +
           return tokens

     

       91
       91
       +
       

     

       92
       92
       +
       class BlueskyPost(cross.Post):

     

       93
       93
       +
           def __init__(self, post: dict, tokens: list[cross.Token], attachments: list[MediaInfo]) -> None:

     

       94
       94
       +
               super().__init__()

     

       95
       95
       +
               self.post = post

     

       96
       96
       +
               self.tokens = tokens

     

       97
       97
       +
               

     

       98
       98
       +
               self.id = json.dumps(self.post['$xpost.strongRef'], sort_keys=True)

     

       99
       99
       +
               

     

       100
       100
       +
               self.parent_id = None

     

       101
       101
       +
               if self.post.get('reply'):

     

       102
       102
       +
                   self.parent_id = json.dumps(self.post['reply']['parent'], sort_keys=True)

     

       103
       103
       +
               

     

       104
       104
       +
               labels = self.post.get('labels', {}).get('values')

     

       105
       105
       +
               self.cw = ''

     

       106
       106
       +
               if labels:

     

       107
       107
       +
                   self.cw = ', '.join([str(label['val']).replace('-', ' ') for label in labels])

     

       108
       108
       +
               self.attachments = attachments

     

       109
       109
       +
           

     

       110
       110
       +
           def get_tokens(self) -> list[cross.Token]:

     

       111
       111
       +
               return self.tokens

     

       112
       112
       +
           

     

       113
       113
       +
           def get_parent_id(self) -> str | None:

     

       114
       114
       +
               return self.parent_id

     

       115
       115
       +
           

     

       116
       116
       +
           def get_post_date_iso(self) -> str:

     

       117
       117
       +
               return self.post.get('createdAt') or super().get_post_date_iso()

     

       118
       118
       +
           

     

       119
       119
       +
           def get_cw(self) -> str:

     

       120
       120
       +
               return self.cw or ''

     

       121
       121
       +
           

     

       122
       122
       +
           def get_id(self) -> str:

     

       123
       123
       +
               return self.id

     

       124
       124
       +
       

     

       125
       125
       +
           def get_languages(self) -> list[str]:

     

       126
       126
       +
               return self.post.get('langs', []) or []

     

       127
       127
       +
           

     

       128
       128
       +
           def is_sensitive(self) -> bool:

     

       129
       129
       +
               return self.post.get('labels', {}).get('values') or False

     

       130
       130
       +
           

     

       131
       131
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       132
       132
       +
               return self.attachments

     

       133
       133
       +
           

     

       134
       134
       +
       

     

       135
       135
       +
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       136
       136
       +
           builder = client_utils.TextBuilder()

     

       137
       137
       +
           

     

       138
       138
       +
           def flatten_link(href: str):

     

       139
       139
       +
               split = href.split('://', 1)

     

       140
       140
       +
               if len(split) > 1:

     

       141
       141
       +
                   href = split[1]

     

       142
       142
       +
               

     

       143
       143
       +
               if len(href) > 32:

     

       144
       144
       +
                   href = href[:32] + '...'

     

       145
       145
       +
               

     

       146
       146
       +
               return href

     

       147
       147
       +
            

     

       148
       148
       +
           for token in tokens:

     

       149
       149
       +
               if isinstance(token, cross.TextToken):

     

       150
       150
       +
                   builder.text(token.text)

     

       151
       151
       +
               elif isinstance(token, cross.LinkToken):

     

       152
       152
       +
                   if canonical_label(token.label, token.href):

     

       153
       153
       +
                       builder.link(flatten_link(token.href), token.href)

     

       154
       154
       +
                       continue

     

       155
       155
       +
                   

     

       156
       156
       +
                   builder.link(token.label, token.href)

     

       157
       157
       +
               elif isinstance(token, cross.TagToken):

     

       158
       158
       +
                   builder.tag('#' + token.tag, token.tag)

     

       159
       159
       +
               else:

     

       160
       160
       +
                   # fail on unsupported tokens

     

       161
       161
       +
                   return None

     

       162
       162
       +
               

     

       163
       163
       +
           return builder

+178

bluesky/input.py

···

       1
       1
       +
       import re, json

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import AsyncFirehoseSubscribeReposClient, CAR

     

       4
       4
       +
       from atproto_client import models

     

       5
       5
       +
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       6
       6
       +
       from atproto_firehose import models as firehose_models, parse_subscribe_repos_message as parse_firehose

     

       7
       7
       +
       from bluesky.atproto2 import resolve_identity

     

       8
       8
       +
       

     

       9
       9
       +
       from bluesky.common import BlueskyPost, SERVICE, tokenize_post

     

       10
       10
       +
       

     

       11
       11
       +
       import cross, util.database as database

     

       12
       12
       +
       from util.util import LOGGER, as_envvar

     

       13
       13
       +
       from util.media import MediaInfo, download_media

     

       14
       14
       +
       from util.database import DataBaseWorker

     

       15
       15
       +
       

     

       16
       16
       +
       from typing import Callable, Any

     

       17
       17
       +
       

     

       18
       18
       +
       class BlueskyInputOptions():

     

       19
       19
       +
           def __init__(self, o: dict) -> None:

     

       20
       20
       +
               self.filters = [re.compile(f) for f in o.get('regex_filters', [])]

     

       21
       21
       +
       

     

       22
       22
       +
       class BlueskyInput(cross.Input):

     

       23
       23
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       24
       24
       +
               self.options = BlueskyInputOptions(settings.get('options', {}))

     

       25
       25
       +
               did, pds = resolve_identity(

     

       26
       26
       +
                   handle=as_envvar(settings.get('handle')),

     

       27
       27
       +
                   did=as_envvar(settings.get('did')),

     

       28
       28
       +
                   pds=as_envvar(settings.get('pds'))

     

       29
       29
       +
               )

     

       30
       30
       +
               self.pds = pds

     

       31
       31
       +
               

     

       32
       32
       +
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       33
       33
       +
               super().__init__(SERVICE, did, settings, db)

     

       34
       34
       +
           

     

       35
       35
       +
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       36
       36
       +
               post_ref = json.dumps(post['$xpost.strongRef'], sort_keys=True)

     

       37
       37
       +
               

     

       38
       38
       +
               parent_ref = None

     

       39
       39
       +
               if post.get('reply'):

     

       40
       40
       +
                   parent_ref = json.dumps(post['reply']['parent'], sort_keys=True)

     

       41
       41
       +
                   

     

       42
       42
       +
               success = database.try_insert_post(self.db, post_ref, parent_ref, self.user_id, self.service)

     

       43
       43
       +
               if not success:

     

       44
       44
       +
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", post_ref)

     

       45
       45
       +
                   return

     

       46
       46
       +
               

     

       47
       47
       +
               tokens = tokenize_post(post)

     

       48
       48
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       49
       49
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", post_ref)

     

       50
       50
       +
                   return

     

       51
       51
       +
               

     

       52
       52
       +
               LOGGER.info("Crossposting '%s'...", post_ref)

     

       53
       53
       +
               

     

       54
       54
       +
               def get_blob_url(blob: str):

     

       55
       55
       +
                   return f'{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}'

     

       56
       56
       +
               

     

       57
       57
       +
               attachments: list[MediaInfo] = []

     

       58
       58
       +
               embed = post.get('embed', {})

     

       59
       59
       +
               if embed.get('$type') == 'app.bsky.embed.images':

     

       60
       60
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       61
       61
       +
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       62
       62
       +
                   

     

       63
       63
       +
                   for image in model.images:

     

       64
       64
       +
                       url = get_blob_url(image.image.cid.encode())

     

       65
       65
       +
                       LOGGER.info("Downloading %s...", url)

     

       66
       66
       +
                       io = download_media(url, image.alt)

     

       67
       67
       +
                       if not io:

     

       68
       68
       +
                           LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       69
       69
       +
                           return

     

       70
       70
       +
                       attachments.append(io)

     

       71
       71
       +
               elif embed.get('$type') == 'app.bsky.embed.video':

     

       72
       72
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       73
       73
       +
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       74
       74
       +
                   url = get_blob_url(model.video.cid.encode())

     

       75
       75
       +
                   LOGGER.info("Downloading %s...", url)

     

       76
       76
       +
                   io = download_media(url, model.alt if model.alt else '')

     

       77
       77
       +
                   if not io:

     

       78
       78
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       79
       79
       +
                       return

     

       80
       80
       +
                   attachments.append(io)

     

       81
       81
       +
                   

     

       82
       82
       +
               cross_post = BlueskyPost(post, tokens, attachments)

     

       83
       83
       +
               for output in outputs:

     

       84
       84
       +
                   output.accept_post(cross_post)

     

       85
       85
       +
               return

     

       86
       86
       +
       

     

       87
       87
       +
           def _on_delete_post(self, outputs: list[cross.Output], post_id: dict):

     

       88
       88
       +
               identifier = json.dumps(post_id, sort_keys=True)

     

       89
       89
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       90
       90
       +
               if not post:

     

       91
       91
       +
                   return

     

       92
       92
       +
               

     

       93
       93
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       94
       94
       +
               for output in outputs:

     

       95
       95
       +
                   output.delete_post(identifier)

     

       96
       96
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       97
       97
       +
       

     

       98
       98
       +
       class BlueskyPdsInput(BlueskyInput):

     

       99
       99
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       100
       100
       +
               super().__init__(settings, db)

     

       101
       101
       +
           

     

       102
       102
       +
           def __on_commit(self, outputs: list[cross.Output], message: firehose_models.MessageFrame):

     

       103
       103
       +
               blocks = message.body.get('blocks')

     

       104
       104
       +
               if not blocks:

     

       105
       105
       +
                   return

     

       106
       106
       +
               

     

       107
       107
       +
               parsed = parse_firehose(message)

     

       108
       108
       +
               if not isinstance(parsed, models.ComAtprotoSyncSubscribeRepos.Commit):

     

       109
       109
       +
                   return

     

       110
       110
       +
               blocks = parsed.blocks

     

       111
       111
       +
                   

     

       112
       112
       +
               car = None

     

       113
       113
       +
               def get_lazy_repo() -> CAR:

     

       114
       114
       +
                   nonlocal car, blocks

     

       115
       115
       +
                       

     

       116
       116
       +
                   if isinstance(blocks, str):

     

       117
       117
       +
                       blocks = blocks.encode()

     

       118
       118
       +
                   assert blocks

     

       119
       119
       +
                       

     

       120
       120
       +
                   if car:

     

       121
       121
       +
                       return car

     

       122
       122
       +
                   car = CAR.from_bytes(blocks)

     

       123
       123
       +
                   return car

     

       124
       124
       +
                   

     

       125
       125
       +
               for op in parsed.ops:

     

       126
       126
       +
                   if op.action == 'delete':

     

       127
       127
       +
                       if not op.prev:

     

       128
       128
       +
                           continue

     

       129
       129
       +
       

     

       130
       130
       +
                       if not op.path.startswith('app.bsky.feed.post'):

     

       131
       131
       +
                           continue

     

       132
       132
       +
                       

     

       133
       133
       +
                       self._on_delete_post(outputs, {

     

       134
       134
       +
                           'cid': op.prev.encode(),

     

       135
       135
       +
                           'uri': f'at://{parsed.repo}/{op.path}' 

     

       136
       136
       +
                       })

     

       137
       137
       +
                       continue

     

       138
       138
       +
                       

     

       139
       139
       +
                   if op.action != 'create':

     

       140
       140
       +
                       continue

     

       141
       141
       +
       

     

       142
       142
       +
                   if not op.cid:

     

       143
       143
       +
                       continue

     

       144
       144
       +
                       

     

       145
       145
       +
                   record_data = get_lazy_repo().blocks.get(op.cid)

     

       146
       146
       +
                   if not record_data:

     

       147
       147
       +
                       continue

     

       148
       148
       +
                   

     

       149
       149
       +
                   record_dict = dict(record_data)

     

       150
       150
       +
                   record_dict['$xpost.strongRef'] = {

     

       151
       151
       +
                       'cid': op.cid.encode(),

     

       152
       152
       +
                       'uri': f'at://{parsed.repo}/{op.path}'

     

       153
       153
       +
                   }

     

       154
       154
       +
                   if record_dict['$type'] == 'app.bsky.feed.post':

     

       155
       155
       +
                       self._on_post(outputs, record_dict)

     

       156
       156
       +
                       

     

       157
       157
       +
           

     

       158
       158
       +
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       159
       159
       +
               streaming: str = f"wss://{self.pds.split("://", 1)[1]}/xrpc"

     

       160
       160
       +
               

     

       161
       161
       +
               client = AsyncFirehoseSubscribeReposClient(base_uri=streaming)

     

       162
       162
       +
               

     

       163
       163
       +
               async def on_message(message: firehose_models.MessageFrame):

     

       164
       164
       +
                   if message.header.t != '#commit':

     

       165
       165
       +
                       return

     

       166
       166
       +
                   

     

       167
       167
       +
                   if message.body.get('repo') != self.user_id:

     

       168
       168
       +
                       return

     

       169
       169
       +
                   

     

       170
       170
       +
                   if message.body.get('tooBig'):

     

       171
       171
       +
                       LOGGER.error("#commit message is tooBig!")

     

       172
       172
       +
                       return

     

       173
       173
       +
       

     

       174
       174
       +
                   submit(lambda: self.__on_commit(outputs, message))

     

       175
       175
       +
                   return

     

       176
       176
       +
               

     

       177
       177
       +
               LOGGER.info("Listening to %s...", streaming + '/com.atproto.sync.subscribeRepos')

     

       178
       178
       +
               await client.start(on_message)

+372

bluesky/output.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       from httpx import Timeout

     

       3
       3
       +
       

     

       4
       4
       +
       from atproto import client_utils, Request

     

       5
       5
       +
       from atproto_client import models

     

       6
       6
       +
       from bluesky.atproto2 import Client2, resolve_identity

     

       7
       7
       +
       

     

       8
       8
       +
       from bluesky.common import SERVICE, ADULT_PATTERN, PORN_PATTERN, tokens_to_richtext

     

       9
       9
       +
       

     

       10
       10
       +
       import cross, util.database as database

     

       11
       11
       +
       from util.util import LOGGER, as_envvar

     

       12
       12
       +
       from util.media import MediaInfo, get_filename_from_url, get_media_meta, compress_image, convert_to_mp4

     

       13
       13
       +
       from util.database import DataBaseWorker

     

       14
       14
       +
       

     

       15
       15
       +
       ALLOWED_GATES = ['mentioned', 'following', 'followers', 'everybody']

     

       16
       16
       +
       

     

       17
       17
       +
       class BlueskyOutputOptions:

     

       18
       18
       +
           def __init__(self, o: dict) -> None:

     

       19
       19
       +
               self.quote_gate: bool = False

     

       20
       20
       +
               self.thread_gate: list[str] = ['everybody']

     

       21
       21
       +
               self.encode_videos: bool = True

     

       22
       22
       +
               

     

       23
       23
       +
               quote_gate = o.get('quote_gate')

     

       24
       24
       +
               if quote_gate is not None:

     

       25
       25
       +
                   self.quote_gate = bool(quote_gate)

     

       26
       26
       +
               

     

       27
       27
       +
               thread_gate = o.get('thread_gate')

     

       28
       28
       +
               if thread_gate is not None:

     

       29
       29
       +
                   if any([v not in ALLOWED_GATES for v in thread_gate]):

     

       30
       30
       +
                       raise ValueError(f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}")

     

       31
       31
       +
                   self.thread_gate = thread_gate

     

       32
       32
       +
               

     

       33
       33
       +
               encode_videos = o.get('encode_videos')

     

       34
       34
       +
               if encode_videos is not None:

     

       35
       35
       +
                   self.encode_videos = bool(encode_videos)

     

       36
       36
       +
       

     

       37
       37
       +
       class BlueskyOutput(cross.Output):

     

       38
       38
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       39
       39
       +
               super().__init__(input, settings, db)

     

       40
       40
       +
               self.options = BlueskyOutputOptions(settings.get('options') or {})

     

       41
       41
       +
               

     

       42
       42
       +
               if not as_envvar(settings.get('app-password')):

     

       43
       43
       +
                   raise Exception("Account app password not provided!")

     

       44
       44
       +
               

     

       45
       45
       +
               did, pds = resolve_identity(

     

       46
       46
       +
                   handle=as_envvar(settings.get('handle')),

     

       47
       47
       +
                   did=as_envvar(settings.get('did')),

     

       48
       48
       +
                   pds=as_envvar(settings.get('pds'))

     

       49
       49
       +
               )

     

       50
       50
       +
               

     

       51
       51
       +
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       52
       52
       +
               

     

       53
       53
       +
               self.bsky = Client2(pds, request=reqs)

     

       54
       54
       +
               self.bsky.login(did, as_envvar(settings.get('app-password')))

     

       55
       55
       +
           

     

       56
       56
       +
           def _find_parent(self, parent_id: str):

     

       57
       57
       +
               login = self.bsky.me

     

       58
       58
       +
               if not login:

     

       59
       59
       +
                   raise Exception("Client not logged in!")

     

       60
       60
       +
               

     

       61
       61
       +
               thread_tuple = database.find_mapped_thread(

     

       62
       62
       +
                   self.db,

     

       63
       63
       +
                   parent_id,

     

       64
       64
       +
                   self.input.user_id,

     

       65
       65
       +
                   self.input.service,

     

       66
       66
       +
                   login.did,

     

       67
       67
       +
                   SERVICE

     

       68
       68
       +
               )

     

       69
       69
       +
               

     

       70
       70
       +
               if not thread_tuple:

     

       71
       71
       +
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       72
       72
       +
                   return None

     

       73
       73
       +
               

     

       74
       74
       +
               root_ref = json.loads(thread_tuple[0])

     

       75
       75
       +
               reply_ref = json.loads(thread_tuple[1])

     

       76
       76
       +
               

     

       77
       77
       +
               root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_ref['uri']), cid=str(root_ref['cid']))

     

       78
       78
       +
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_ref['uri']), cid=str(reply_ref['cid']))

     

       79
       79
       +
               

     

       80
       80
       +
               return (

     

       81
       81
       +
                   models.create_strong_ref(root_record),

     

       82
       82
       +
                   models.create_strong_ref(reply_record),

     

       83
       83
       +
                   thread_tuple[2],

     

       84
       84
       +
                   thread_tuple[3]

     

       85
       85
       +
               )

     

       86
       86
       +
           

     

       87
       87
       +
           def _split_attachments(self, attachments: list[MediaInfo]):

     

       88
       88
       +
               sup_media: list[MediaInfo] = []

     

       89
       89
       +
               unsup_media: list[MediaInfo] = []

     

       90
       90
       +
               

     

       91
       91
       +
               for a in attachments:

     

       92
       92
       +
                   if a.mime.startswith('image/') or a.mime.startswith('video/'): # TODO convert gifs to videos

     

       93
       93
       +
                       sup_media.append(a)

     

       94
       94
       +
                   else:

     

       95
       95
       +
                       unsup_media.append(a)

     

       96
       96
       +
               

     

       97
       97
       +
               return (sup_media, unsup_media)

     

       98
       98
       +
       

     

       99
       99
       +
           def _split_media_per_post(

     

       100
       100
       +
               self, 

     

       101
       101
       +
               tokens: list[client_utils.TextBuilder], 

     

       102
       102
       +
               media: list[MediaInfo]):

     

       103
       103
       +
               

     

       104
       104
       +
               posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]

     

       105
       105
       +
               available_indices: list[int] = list(range(len(posts)))

     

       106
       106
       +
               

     

       107
       107
       +
               current_image_post_idx: int | None = None

     

       108
       108
       +
       

     

       109
       109
       +
               def make_blank_post() -> dict:

     

       110
       110
       +
                   return {

     

       111
       111
       +
                       "tokens": [client_utils.TextBuilder().text('')],

     

       112
       112
       +
                       "attachments": []

     

       113
       113
       +
                   }

     

       114
       114
       +
               

     

       115
       115
       +
               def pop_next_empty_index() -> int:

     

       116
       116
       +
                   if available_indices:

     

       117
       117
       +
                       return available_indices.pop(0)

     

       118
       118
       +
                   else:

     

       119
       119
       +
                       new_idx = len(posts)

     

       120
       120
       +
                       posts.append(make_blank_post())

     

       121
       121
       +
                       return new_idx

     

       122
       122
       +
               

     

       123
       123
       +
               for att in media:

     

       124
       124
       +
                   if att.mime.startswith('video/'):

     

       125
       125
       +
                       current_image_post_idx = None

     

       126
       126
       +
                       idx = pop_next_empty_index()

     

       127
       127
       +
                       posts[idx]["attachments"].append(att)

     

       128
       128
       +
                   elif att.mime.startswith('image/'):

     

       129
       129
       +
                       if (

     

       130
       130
       +
                           current_image_post_idx is not None

     

       131
       131
       +
                           and len(posts[current_image_post_idx]["attachments"]) < 4

     

       132
       132
       +
                       ):

     

       133
       133
       +
                           posts[current_image_post_idx]["attachments"].append(att)

     

       134
       134
       +
                       else:

     

       135
       135
       +
                           idx = pop_next_empty_index()

     

       136
       136
       +
                           posts[idx]["attachments"].append(att)

     

       137
       137
       +
                           current_image_post_idx = idx

     

       138
       138
       +
               

     

       139
       139
       +
               result: list[tuple[client_utils.TextBuilder, list[MediaInfo]]] = []

     

       140
       140
       +
               for p in posts:

     

       141
       141
       +
                   result.append((p["tokens"], p["attachments"]))

     

       142
       142
       +
               return result

     

       143
       143
       +
           

     

       144
       144
       +
           def accept_post(self, post: cross.Post):

     

       145
       145
       +
               login = self.bsky.me

     

       146
       146
       +
               if not login:

     

       147
       147
       +
                   raise Exception("Client not logged in!")

     

       148
       148
       +
               

     

       149
       149
       +
               parent_id = post.get_parent_id()

     

       150
       150
       +
               

     

       151
       151
       +
               # used for db insertion

     

       152
       152
       +
               new_root_id = None

     

       153
       153
       +
               new_parent_id = None

     

       154
       154
       +
               

     

       155
       155
       +
               root_ref = None

     

       156
       156
       +
               reply_ref = None

     

       157
       157
       +
               if parent_id:

     

       158
       158
       +
                   parents = self._find_parent(parent_id)

     

       159
       159
       +
                   if not parents:

     

       160
       160
       +
                       return

     

       161
       161
       +
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       162
       162
       +
               

     

       163
       163
       +
               tokens = post.get_tokens().copy()

     

       164
       164
       +
               

     

       165
       165
       +
               unique_labels: set[str] = set()

     

       166
       166
       +
               cw = post.get_cw()

     

       167
       167
       +
               if cw:

     

       168
       168
       +
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       169
       169
       +
                   unique_labels.add('graphic-media')

     

       170
       170
       +
               

     

       171
       171
       +
               # from bsky.app, a post can only have one of those labels

     

       172
       172
       +
               if PORN_PATTERN.search(cw):

     

       173
       173
       +
                   unique_labels.add('porn')

     

       174
       174
       +
               elif ADULT_PATTERN.search(cw):

     

       175
       175
       +
                   unique_labels.add('sexual')

     

       176
       176
       +
               

     

       177
       177
       +
               if post.is_sensitive():

     

       178
       178
       +
                   unique_labels.add('graphic-media')

     

       179
       179
       +
               

     

       180
       180
       +
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in unique_labels])

     

       181
       181
       +
       

     

       182
       182
       +
               sup_media, unsup_media = self._split_attachments(post.get_attachments())

     

       183
       183
       +
       

     

       184
       184
       +
               if unsup_media:

     

       185
       185
       +
                   if tokens:

     

       186
       186
       +
                       tokens.append(cross.TextToken('\n'))

     

       187
       187
       +
                   for i, attachment in enumerate(unsup_media):

     

       188
       188
       +
                       tokens.append(cross.LinkToken(

     

       189
       189
       +
                               attachment.url,

     

       190
       190
       +
                               f"[{get_filename_from_url(attachment.url)}]"

     

       191
       191
       +
                       ))

     

       192
       192
       +
                       tokens.append(cross.TextToken(' '))

     

       193
       193
       +
       

     

       194
       194
       +
               

     

       195
       195
       +
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       196
       196
       +
               post_text: list[client_utils.TextBuilder] = []

     

       197
       197
       +
               

     

       198
       198
       +
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       199
       199
       +
               for block in split_tokens:

     

       200
       200
       +
                   rich_text = tokens_to_richtext(block)

     

       201
       201
       +
                   

     

       202
       202
       +
                   if not rich_text:

     

       203
       203
       +
                       LOGGER.error("Skipping '%s' as it contains invalid rich text types!", post.get_id())

     

       204
       204
       +
                       return

     

       205
       205
       +
                   post_text.append(rich_text)

     

       206
       206
       +
               

     

       207
       207
       +
               if not post_text:

     

       208
       208
       +
                   post_text = [client_utils.TextBuilder().text('')]

     

       209
       209
       +
               

     

       210
       210
       +
               for m in sup_media:

     

       211
       211
       +
                   if m.mime.startswith('image/'):

     

       212
       212
       +
                       if len(m.io) > 2_000_000:

     

       213
       213
       +
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large.", post.get_id())

     

       214
       214
       +
                           return

     

       215
       215
       +
                   

     

       216
       216
       +
                   if m.mime.startswith('video/'):

     

       217
       217
       +
                       if m.mime != 'video/mp4' and not self.options.encode_videos:

     

       218
       218
       +
                           LOGGER.info("Video is not mp4, but encoding is disabled. Skipping '%s'...", post.get_id())

     

       219
       219
       +
                           return

     

       220
       220
       +
                       

     

       221
       221
       +
                       if len(m.io) > 100_000_000:

     

       222
       222
       +
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large?", post.get_id())

     

       223
       223
       +
                           return

     

       224
       224
       +
               

     

       225
       225
       +
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       226
       226
       +
               baked_media = self._split_media_per_post(post_text, sup_media)

     

       227
       227
       +
               

     

       228
       228
       +
               for text, attachments in baked_media:

     

       229
       229
       +
                   if not attachments:

     

       230
       230
       +
                       if reply_ref and root_ref:

     

       231
       231
       +
                           new_post = self.bsky.send_post(text, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       232
       232
       +
                               parent=reply_ref,

     

       233
       233
       +
                               root=root_ref

     

       234
       234
       +
                           ), labels=labels, time_iso=post.get_post_date_iso())

     

       235
       235
       +
                       else:

     

       236
       236
       +
                           new_post = self.bsky.send_post(text, labels=labels, time_iso=post.get_post_date_iso())

     

       237
       237
       +
                           root_ref = models.create_strong_ref(new_post)

     

       238
       238
       +
                       

     

       239
       239
       +
                       self.bsky.create_gates(

     

       240
       240
       +
                           self.options.thread_gate, 

     

       241
       241
       +
                           self.options.quote_gate, 

     

       242
       242
       +
                           new_post.uri, 

     

       243
       243
       +
                           time_iso=post.get_post_date_iso()

     

       244
       244
       +
                       )

     

       245
       245
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       246
       246
       +
                       created_records.append(new_post)

     

       247
       247
       +
                   else:

     

       248
       248
       +
                       # if a single post is an image - everything else is an image

     

       249
       249
       +
                       if attachments[0].mime.startswith('image/'):

     

       250
       250
       +
                           images: list[bytes] = []

     

       251
       251
       +
                           image_alts: list[str] = []

     

       252
       252
       +
                           image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       253
       253
       +
                           

     

       254
       254
       +
                           for attachment in attachments:

     

       255
       255
       +
                               image_io = compress_image(attachment.io, quality=100)

     

       256
       256
       +
                               metadata = get_media_meta(image_io)

     

       257
       257
       +
                           

     

       258
       258
       +
                               if len(image_io) > 1_000_000:

     

       259
       259
       +
                                   LOGGER.info("Compressing %s...", attachment.name)

     

       260
       260
       +
                                   image_io = compress_image(image_io)

     

       261
       261
       +
                           

     

       262
       262
       +
                               images.append(image_io)

     

       263
       263
       +
                               image_alts.append(attachment.alt)

     

       264
       264
       +
                               image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(

     

       265
       265
       +
                                   width=metadata['width'], 

     

       266
       266
       +
                                   height=metadata['height']

     

       267
       267
       +
                               ))

     

       268
       268
       +
                           

     

       269
       269
       +
                           new_post = self.bsky.send_images(

     

       270
       270
       +
                               text=post_text[0],

     

       271
       271
       +
                               images=images,

     

       272
       272
       +
                               image_alts=image_alts,

     

       273
       273
       +
                               image_aspect_ratios=image_aspect_ratios,

     

       274
       274
       +
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       275
       275
       +
                                   parent=reply_ref,

     

       276
       276
       +
                                   root=root_ref

     

       277
       277
       +
                               ) if root_ref and reply_ref else None, 

     

       278
       278
       +
                               labels=labels, 

     

       279
       279
       +
                               time_iso=post.get_post_date_iso()

     

       280
       280
       +
                           )

     

       281
       281
       +
                           if not root_ref:

     

       282
       282
       +
                               root_ref = models.create_strong_ref(new_post)

     

       283
       283
       +
                           

     

       284
       284
       +
                           self.bsky.create_gates(

     

       285
       285
       +
                               self.options.thread_gate, 

     

       286
       286
       +
                               self.options.quote_gate,

     

       287
       287
       +
                               new_post.uri, 

     

       288
       288
       +
                               time_iso=post.get_post_date_iso()

     

       289
       289
       +
                           )

     

       290
       290
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       291
       291
       +
                           created_records.append(new_post)

     

       292
       292
       +
                       else: # video is guarantedd to be one

     

       293
       293
       +
                           metadata = get_media_meta(attachments[0].io)

     

       294
       294
       +
                           if metadata['duration'] > 180:

     

       295
       295
       +
                               LOGGER.info("Skipping post_id '%s', video attachment too long!", post.get_id())

     

       296
       296
       +
                               return

     

       297
       297
       +
                       

     

       298
       298
       +
                           video_io = attachments[0].io

     

       299
       299
       +
                           if attachments[0].mime != 'video/mp4':

     

       300
       300
       +
                               LOGGER.info("Converting %s to mp4...", attachments[0].name)

     

       301
       301
       +
                               video_io = convert_to_mp4(video_io)

     

       302
       302
       +
                               

     

       303
       303
       +
                           aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       304
       304
       +
                               width=metadata['width'], 

     

       305
       305
       +
                               height=metadata['height']

     

       306
       306
       +
                           )

     

       307
       307
       +
                           

     

       308
       308
       +
                           new_post = self.bsky.send_video(

     

       309
       309
       +
                               text=post_text[0],

     

       310
       310
       +
                               video=video_io,

     

       311
       311
       +
                               video_aspect_ratio=aspect_ratio,

     

       312
       312
       +
                               video_alt=attachments[0].alt,

     

       313
       313
       +
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       314
       314
       +
                                   parent=reply_ref,

     

       315
       315
       +
                                   root=root_ref

     

       316
       316
       +
                               ) if root_ref and reply_ref else None,

     

       317
       317
       +
                               labels=labels,

     

       318
       318
       +
                               time_iso=post.get_post_date_iso()

     

       319
       319
       +
                           )

     

       320
       320
       +
                           if not root_ref:

     

       321
       321
       +
                               root_ref = models.create_strong_ref(new_post)

     

       322
       322
       +
                           

     

       323
       323
       +
                           self.bsky.create_gates(

     

       324
       324
       +
                               self.options.thread_gate,

     

       325
       325
       +
                               self.options.quote_gate, 

     

       326
       326
       +
                               new_post.uri, 

     

       327
       327
       +
                               time_iso=post.get_post_date_iso()

     

       328
       328
       +
                           )

     

       329
       329
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       330
       330
       +
                           created_records.append(new_post)

     

       331
       331
       +
               

     

       332
       332
       +
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       333
       333
       +
               assert db_post, "ghghghhhhh"

     

       334
       334
       +
               

     

       335
       335
       +
               db_identifiers = [json.dumps(cr.model_dump(), sort_keys=True) for cr in created_records]

     

       336
       336
       +
               

     

       337
       337
       +
               if new_root_id is None or  new_parent_id is None:

     

       338
       338
       +
                   new_root_id = database.insert_post(

     

       339
       339
       +
                       self.db,

     

       340
       340
       +
                       db_identifiers[0],

     

       341
       341
       +
                       login.did,

     

       342
       342
       +
                       SERVICE

     

       343
       343
       +
                   )

     

       344
       344
       +
                   new_parent_id = new_root_id

     

       345
       345
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       346
       346
       +
                   db_identifiers = db_identifiers[1:]

     

       347
       347
       +
               

     

       348
       348
       +
               for db_id in db_identifiers:

     

       349
       349
       +
                   new_parent_id = database.insert_reply(

     

       350
       350
       +
                       self.db, 

     

       351
       351
       +
                       db_id,

     

       352
       352
       +
                       login.did,

     

       353
       353
       +
                       SERVICE,

     

       354
       354
       +
                       new_parent_id,

     

       355
       355
       +
                       new_root_id

     

       356
       356
       +
                   )

     

       357
       357
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       358
       358
       +
                   

     

       359
       359
       +
           def delete_post(self, identifier: str):

     

       360
       360
       +
               login = self.bsky.me

     

       361
       361
       +
               if not login:

     

       362
       362
       +
                   raise Exception("Client not logged in!")

     

       363
       363
       +
               

     

       364
       364
       +
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       365
       365
       +
               if not post:

     

       366
       366
       +
                   return

     

       367
       367
       +
               

     

       368
       368
       +
               mappings = database.find_mappings(self.db, post['id'], SERVICE, login.did)

     

       369
       369
       +
               for mapping in mappings[::-1]:

     

       370
       370
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       371
       371
       +
                   self.bsky.delete_post(json.loads(mapping[0])['uri'])

     

       372
       372
       +
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

+4 -5

cross.py

···

       1
       1
        
       from typing import Callable, Any

     

       2
       2
       -
       from database import DataBaseWorker

     

       2
       2
       +
       from util.database import DataBaseWorker

     

       3
       3
        
       from datetime import datetime, timezone

     

       4
       4
       -
       from media_util import MediaInfo

     

       5
       5
       -
       from util import LOGGER

     

       6
       6
       -
       import util

     

       4
       4
       +
       from util.media import MediaInfo

     

       5
       5
       +
       from util.util import LOGGER, canonical_label

     

       7
       6
        
       import re

     

       8
       7
        
       

     

       9
       8
        
       ALTERNATE = re.compile(r'\S+|\s+')

     
···

       307
       306
        
       

     

       308
       307
        
               elif isinstance(token, LinkToken):

     

       309
       308
        
                   link_len = len(token.label)

     

       310
       310
       -
                   if util.canonical_label(token.label, token.href):

     

       309
       309
       +
                   if canonical_label(token.label, token.href):

     

       311
       310
        
                       link_len = min(link_len, max_link_len)

     

       312
       311
        
       

     

       313
       312
        
                   if current_length + link_len <= max_chars:

database.py util/database.py

+19 -12

main.py

···

       1
       1
       -
       from util import LOGGER

     

       2
       1
        
       import os

     

       3
       2
        
       import json

     

       4
       4
       -
       import database

     

       5
       5
       -
       import mastodon, misskey, bluesky, cross

     

       6
       3
        
       import asyncio, threading, queue, traceback

     

       7
       7
       -
       import util

     

       4
       4
       +
       

     

       5
       5
       +
       from util.util import LOGGER, as_json

     

       6
       6
       +
       import cross, util.database as database

     

       7
       7
       +
       

     

       8
       8
       +
       from bluesky.input import BlueskyPdsInput

     

       9
       9
       +
       from bluesky.output import BlueskyOutputOptions, BlueskyOutput

     

       10
       10
       +
       

     

       11
       11
       +
       from mastodon.input import MastodonInputOptions, MastodonInput

     

       12
       12
       +
       from mastodon.output import MastodonOutput

     

       13
       13
       +
       

     

       14
       14
       +
       from misskey.input import MisskeyInput

     

       8
       15
        
       

     

       9
       16
        
       DEFAULT_SETTINGS: dict = {

     

       10
       17
        
           'input': {

     

       11
       18
        
               'type': 'mastodon-wss',

     

       12
       19
        
               'instance': 'env:MASTODON_INSTANCE',

     

       13
       20
        
               'token': 'env:MASTODON_TOKEN',

     

       14
       14
       -
               "options": mastodon.MastodonInputOptions({})

     

       21
       21
       +
               "options": MastodonInputOptions({})

     

       15
       22
        
           },

     

       16
       23
        
           'outputs': [

     

       17
       24
        
               {

     

       18
       25
        
                   'type': 'bluesky',

     

       19
       26
        
                   'handle': 'env:BLUESKY_HANDLE',

     

       20
       27
        
                   'app-password': 'env:BLUESKY_APP_PASSWORD',

     

       21
       21
       -
                   'options': bluesky.BlueskyOutputOptions({})

     

       28
       28
       +
                   'options': BlueskyOutputOptions({})

     

       22
       29
        
               }

     

       23
       30
        
           ]

     

       24
       31
        
       }

     

       25
       32
        
       

     

       26
       33
        
       INPUTS = {

     

       27
       27
       -
           "mastodon-wss": lambda settings, db: mastodon.MastodonInput(settings, db),

     

       28
       28
       -
           "misskey-wss": lambda settigs, db: misskey.MisskeyInput(settigs, db),

     

       29
       29
       -
           "bluesky-pds-wss": lambda settings, db: bluesky.BlueskyPdsInput(settings, db)

     

       34
       34
       +
           "mastodon-wss": lambda settings, db: MastodonInput(settings, db),

     

       35
       35
       +
           "misskey-wss": lambda settigs, db: MisskeyInput(settigs, db),

     

       36
       36
       +
           "bluesky-pds-wss": lambda settings, db: BlueskyPdsInput(settings, db)

     

       30
       37
        
       }

     

       31
       38
        
       

     

       32
       39
        
       OUTPUTS = {

     

       33
       33
       -
           "bluesky": lambda input, settings, db: bluesky.BlueskyOutput(input, settings, db),

     

       34
       34
       -
           "mastodon": lambda input, settings, db: mastodon.MastodonOutput(input, settings, db)

     

       40
       40
       +
           "bluesky": lambda input, settings, db: BlueskyOutput(input, settings, db),

     

       41
       41
       +
           "mastodon": lambda input, settings, db: MastodonOutput(input, settings, db)

     

       35
       42
        
       }

     

       36
       43
        
       

     

       37
       44
        
       def execute(data_dir):

     
···

       45
       52
        
               LOGGER.info("First launch detected! Creating %s and exiting!", settings_path)

     

       46
       53
        
               

     

       47
       54
        
               with open(settings_path, 'w') as f:

     

       48
       48
       -
                   f.write(util.as_json(DEFAULT_SETTINGS, indent=2))

     

       55
       55
       +
                   f.write(as_json(DEFAULT_SETTINGS, indent=2))

     

       49
       56
        
               return 0

     

       50
       57
        
       

     

       51
       58
        
           LOGGER.info('Loading settings...')

markeddown.py mastodon/markeddown.py

-614

mastodon.py

···

       1
       1
       -
       from util import LOGGER

     

       2
       2
       -
       import requests, websockets

     

       3
       3
       -
       import util, media_util, json, cross

     

       4
       4
       -
       import database

     

       5
       5
       -
       from database import DataBaseWorker

     

       6
       6
       -
       from typing import Callable, Any

     

       7
       7
       -
       import asyncio, time

     

       8
       8
       -
       

     

       9
       9
       -
       from bs4 import BeautifulSoup, Tag

     

       10
       10
       -
       from bs4.element import NavigableString

     

       11
       11
       -
       import markeddown

     

       12
       12
       -
       from html import unescape

     

       13
       13
       -
       import re

     

       14
       14
       -
       

     

       15
       15
       -
       POSSIBLE_MIMES = [

     

       16
       16
       -
           'audio/ogg',

     

       17
       17
       -
           'audio/mp3',

     

       18
       18
       -
           'image/webp',

     

       19
       19
       -
           'image/jpeg',

     

       20
       20
       -
           'image/png',

     

       21
       21
       -
           'video/mp4',

     

       22
       22
       -
           'video/quicktime',

     

       23
       23
       -
           'video/webm'

     

       24
       24
       -
       ]

     

       25
       25
       -
           

     

       26
       26
       -
       md_parser = markeddown.HTMLToMarkdownParser()

     

       27
       27
       -
       md_parser.preserve_spaces = True

     

       28
       28
       -
       

     

       29
       29
       -
       def tokenize_post(status: dict) -> list[cross.Token]:

     

       30
       30
       -
           if not status.get('content'):

     

       31
       31
       -
               return []

     

       32
       32
       -
           

     

       33
       33
       -
           soup = BeautifulSoup(status['content'], "html.parser")

     

       34
       34
       -
           tokens: list[cross.Token] = []

     

       35
       35
       -
           

     

       36
       36
       -
           tags: list[dict] = status.get('tags', [])

     

       37
       37
       -
           mentions: list[dict] = status.get('mentions', [])

     

       38
       38
       -
           

     

       39
       39
       -
           def mdd(html):

     

       40
       40
       -
               md_parser.feed(unescape(html))

     

       41
       41
       -
               md = md_parser.get_markdown()

     

       42
       42
       -
               md_parser.reset()

     

       43
       43
       -
               return md

     

       44
       44
       -
           

     

       45
       45
       -
           def recurse(node) -> None:

     

       46
       46
       -
               if isinstance(node, NavigableString):

     

       47
       47
       -
                   tokens.append(cross.TextToken(str(node)))

     

       48
       48
       -
                   return

     

       49
       49
       -
               

     

       50
       50
       -
               if isinstance(node, Tag):

     

       51
       51
       -
                   if node.name.lower() == "a":

     

       52
       52
       -
                       href = node.get("href", "")

     

       53
       53
       -
                       inner_html = "".join(str(c) for c in node.contents)

     

       54
       54
       -
                       link_text_md = mdd(inner_html)

     

       55
       55
       -
                       

     

       56
       56
       -
                       if link_text_md.startswith('@'):

     

       57
       57
       -
                           as_mention = link_text_md[1:]

     

       58
       58
       -
                           for block in mentions:

     

       59
       59
       -
                               if href == block.get('url'):

     

       60
       60
       -
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       61
       61
       -
                                   return

     

       62
       62
       -
                               elif as_mention == block.get('acct') or as_mention == block.get('username'):

     

       63
       63
       -
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       64
       64
       -
                                   return

     

       65
       65
       -
                       

     

       66
       66
       -
                       if link_text_md.startswith('#'):

     

       67
       67
       -
                           as_tag = link_text_md[1:].lower()

     

       68
       68
       -
                           if any(as_tag == block.get('name') for block in tags):

     

       69
       69
       -
                               tokens.append(cross.TagToken(link_text_md[1:]))

     

       70
       70
       -
                               return

     

       71
       71
       -
                       

     

       72
       72
       -
                       # idk if we can safely convert this to string

     

       73
       73
       -
                       tokens.append(cross.LinkToken(str(href), link_text_md))

     

       74
       74
       -
                       return

     

       75
       75
       -
                   

     

       76
       76
       -
                   if node.find("a") is not None:

     

       77
       77
       -
                       for child in node.contents:

     

       78
       78
       -
                           recurse(child)

     

       79
       79
       -
                       return

     

       80
       80
       -
                   

     

       81
       81
       -
                   serialized = str(node)

     

       82
       82
       -
                   markdownified = mdd(serialized)

     

       83
       83
       -
                   if markdownified:

     

       84
       84
       -
                       tokens.append(cross.TextToken(markdownified))

     

       85
       85
       -
                   return

     

       86
       86
       -
               return

     

       87
       87
       -
           

     

       88
       88
       -
           for child in soup.contents:

     

       89
       89
       -
               recurse(child)

     

       90
       90
       -
           

     

       91
       91
       -
           if not tokens:

     

       92
       92
       -
               return []

     

       93
       93
       -
           

     

       94
       94
       -
           last_token = tokens[-1]

     

       95
       95
       -
           if last_token and isinstance(last_token, cross.TextToken) and last_token.text.endswith('\n\n'):

     

       96
       96
       -
               tokens[-1] = cross.TextToken(last_token.text[:-2])

     

       97
       97
       -
           

     

       98
       98
       -
           return tokens

     

       99
       99
       -
       

     

       100
       100
       -
       MARKDOWNY = ['text/x.misskeymarkdown', 'text/markdown', 'text/plain']

     

       101
       101
       -
           

     

       102
       102
       -
       class MastodonPost(cross.Post):

     

       103
       103
       -
           def __init__(self, status: dict, tokens: list[cross.Token], media_attachments: list[media_util.MediaInfo]) -> None:

     

       104
       104
       -
               super().__init__()

     

       105
       105
       -
               self.status = status

     

       106
       106
       -
               self.media_attachments = media_attachments

     

       107
       107
       -
               self.tokens = tokens

     

       108
       108
       -
           

     

       109
       109
       -
           def get_tokens(self) -> list[cross.Token]:

     

       110
       110
       -
               return self.tokens

     

       111
       111
       -
           

     

       112
       112
       -
           def get_parent_id(self) -> str | None:

     

       113
       113
       -
               return self.status.get('in_reply_to_id')

     

       114
       114
       -
           

     

       115
       115
       -
           def get_post_date_iso(self) -> str:

     

       116
       116
       -
               date = self.status.get('created_at')

     

       117
       117
       -
               return date or super().get_post_date_iso()

     

       118
       118
       -
           

     

       119
       119
       -
           def get_cw(self) -> str:

     

       120
       120
       -
               return self.status.get('spoiler_text') or ''

     

       121
       121
       -
           

     

       122
       122
       -
           def get_id(self) -> str:

     

       123
       123
       -
               return self.status['id']

     

       124
       124
       -
           

     

       125
       125
       -
           def get_languages(self) -> list[str]:

     

       126
       126
       -
               if self.status.get('language'):

     

       127
       127
       -
                   return [self.status['language']]

     

       128
       128
       -
               return []

     

       129
       129
       -
           

     

       130
       130
       -
           def is_sensitive(self) -> bool:

     

       131
       131
       -
               return self.status.get('sensitive', False)

     

       132
       132
       -
           

     

       133
       133
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       134
       134
       -
               return self.media_attachments

     

       135
       135
       -
       

     

       136
       136
       -
       ALLOWED_VISIBILITY = ['public', 'unlisted']

     

       137
       137
       -
           

     

       138
       138
       -
       class MastodonInputOptions():

     

       139
       139
       -
           def __init__(self, o: dict) -> None:

     

       140
       140
       -
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       141
       141
       -
               self.filters = [re.compile(f) for f in o.get('regex_filters', [])]

     

       142
       142
       -
               

     

       143
       143
       -
               allowed_visibility = o.get('allowed_visibility')

     

       144
       144
       -
               if allowed_visibility is not None:

     

       145
       145
       -
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       146
       146
       -
                       raise ValueError(f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}")

     

       147
       147
       -
                   self.allowed_visibility = allowed_visibility

     

       148
       148
       -
       

     

       149
       149
       -
       class MastodonInput(cross.Input):

     

       150
       150
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       151
       151
       -
               self.options = MastodonInputOptions(settings.get('options', {}))

     

       152
       152
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       153
       153
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       154
       154
       -
               

     

       155
       155
       -
               service = instance[:-1] if instance.endswith('/') else instance

     

       156
       156
       -
               

     

       157
       157
       -
               LOGGER.info("Verifying %s credentails...", service)

     

       158
       158
       -
               responce = requests.get(f"{service}/api/v1/accounts/verify_credentials", headers={

     

       159
       159
       -
                   'Authorization': f'Bearer {self.token}'

     

       160
       160
       -
               })

     

       161
       161
       -
               if responce.status_code != 200:

     

       162
       162
       -
                   LOGGER.error("Failed to validate user credentials!")

     

       163
       163
       -
                   responce.raise_for_status()

     

       164
       164
       -
                   return

     

       165
       165
       -
               

     

       166
       166
       -
               super().__init__(service, responce.json()["id"], settings, db)

     

       167
       167
       -
               self.streaming = self._get_streaming_url()

     

       168
       168
       -
               

     

       169
       169
       -
               if not self.streaming:

     

       170
       170
       -
                   raise Exception("Instance %s does not support streaming!", service)

     

       171
       171
       -
       

     

       172
       172
       -
           def _get_streaming_url(self):

     

       173
       173
       -
               response = requests.get(f"{self.service}/api/v1/instance")

     

       174
       174
       -
               response.raise_for_status()

     

       175
       175
       -
               data: dict = response.json()

     

       176
       176
       -
               return (data.get('urls') or {}).get('streaming_api')

     

       177
       177
       -
       

     

       178
       178
       -
           def __to_tokens(self, status: dict):

     

       179
       179
       -
               content_type = status.get('content_type', 'text/plain')

     

       180
       180
       -
               raw_text = status.get('text')

     

       181
       181
       -
               

     

       182
       182
       -
               tags: list[str] = []

     

       183
       183
       -
               for tag in status.get('tags', []):

     

       184
       184
       -
                   tags.append(tag['name'])

     

       185
       185
       -
               

     

       186
       186
       -
               mentions: list[tuple[str, str]] = []

     

       187
       187
       -
               for mention in status.get('mentions', []):

     

       188
       188
       -
                   mentions.append(('@' + mention['username'], '@' + mention['acct']))

     

       189
       189
       -
               

     

       190
       190
       -
               if raw_text and content_type in MARKDOWNY:

     

       191
       191
       -
                   return cross.tokenize_markdown(raw_text, tags, mentions)

     

       192
       192
       -
               

     

       193
       193
       -
               akkoma_ext: dict | None = status.get('akkoma', {}).get('source')

     

       194
       194
       -
               if akkoma_ext:

     

       195
       195
       -
                   if akkoma_ext.get('mediaType') in MARKDOWNY:

     

       196
       196
       -
                       return cross.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       197
       197
       -
                       

     

       198
       198
       -
               return tokenize_post(status)

     

       199
       199
       -
           

     

       200
       200
       -
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       201
       201
       -
               # skip events from other users

     

       202
       202
       -
               if (status.get('account') or {})['id'] != self.user_id:

     

       203
       203
       -
                   return

     

       204
       204
       -
               

     

       205
       205
       -
               if status.get('reblog') or (status.get('quote_id') or status.get('quote')) or status.get('poll'):

     

       206
       206
       -
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       207
       207
       -
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       208
       208
       -
                   # we don't handle quotes.

     

       209
       209
       -
                   LOGGER.info("Skipping '%s'! Reblog, quote or poll..", status['id'])

     

       210
       210
       -
                   return

     

       211
       211
       -
               

     

       212
       212
       -
               in_reply: str | None = status.get('in_reply_to_id')

     

       213
       213
       -
               in_reply_to: str | None = status.get('in_reply_to_account_id')

     

       214
       214
       -
               if in_reply_to and in_reply_to != self.user_id:

     

       215
       215
       -
                   # We don't support replies.

     

       216
       216
       -
                   LOGGER.info("Skipping '%s'! Reply to other user..", status['id'])

     

       217
       217
       -
                   return

     

       218
       218
       -
               

     

       219
       219
       -
               if status.get('visibility') not in self.options.allowed_visibility:

     

       220
       220
       -
                   # Skip f/o and direct posts

     

       221
       221
       -
                   LOGGER.info("Skipping '%s'! '%s' visibility..", status['id'], status.get('visibility'))

     

       222
       222
       -
                   return

     

       223
       223
       -
               

     

       224
       224
       -
               success = database.try_insert_post(self.db, status['id'], in_reply, self.user_id, self.service)

     

       225
       225
       -
               if not success:

     

       226
       226
       -
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", status['id'])

     

       227
       227
       -
                   return

     

       228
       228
       -
               

     

       229
       229
       -
               tokens = self.__to_tokens(status)

     

       230
       230
       -
               if not cross.test_filters(tokens, self.options.filters):

     

       231
       231
       -
                   LOGGER.info("Skipping '%s'. Matched a filter!", status['id'])

     

       232
       232
       -
                   return

     

       233
       233
       -
               

     

       234
       234
       -
               LOGGER.info("Crossposting '%s'...", status['id'])

     

       235
       235
       -
               

     

       236
       236
       -
               media_attachments: list[media_util.MediaInfo] = []

     

       237
       237
       -
               for attachment in status.get('media_attachments', []):

     

       238
       238
       -
                   LOGGER.info("Downloading %s...", attachment['url'])

     

       239
       239
       -
                   info = media_util.download_media(attachment['url'], attachment.get('description') or '')

     

       240
       240
       -
                   if not info:

     

       241
       241
       -
                       LOGGER.error("Skipping '%s'. Failed to download media!", status['id'])

     

       242
       242
       -
                       return

     

       243
       243
       -
                   media_attachments.append(info)

     

       244
       244
       -
               

     

       245
       245
       -
               cross_post = MastodonPost(status, tokens, media_attachments)

     

       246
       246
       -
               for output in outputs:

     

       247
       247
       -
                   output.accept_post(cross_post)

     

       248
       248
       -
           

     

       249
       249
       -
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       250
       250
       -
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       251
       251
       -
               if not post:

     

       252
       252
       -
                   return

     

       253
       253
       -
               

     

       254
       254
       -
               LOGGER.info("Deleting '%s'...", identifier)

     

       255
       255
       -
               for output in outputs:

     

       256
       256
       -
                   output.delete_post(identifier)

     

       257
       257
       -
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       258
       258
       -
           

     

       259
       259
       -
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       260
       260
       -
               if event == 'update':

     

       261
       261
       -
                   self._on_create_post(outputs, json.loads(payload))

     

       262
       262
       -
               elif event == 'delete':

     

       263
       263
       -
                   self._on_delete_post(outputs, payload)          

     

       264
       264
       -
           

     

       265
       265
       -
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       266
       266
       -
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       267
       267
       -
               

     

       268
       268
       -
               async for ws in websockets.connect(uri, extra_headers={"User-Agent": "XPost/0.0.3"}):

     

       269
       269
       -
                   try:

     

       270
       270
       -
                       LOGGER.info("Listening to %s...", self.streaming)

     

       271
       271
       -
                       

     

       272
       272
       -
                       async def listen_for_messages():

     

       273
       273
       -
                           async for msg in ws:

     

       274
       274
       -
                               data = json.loads(msg)

     

       275
       275
       -
                               event: str = data.get('event')

     

       276
       276
       -
                               payload: str = data.get('payload')

     

       277
       277
       -
                   

     

       278
       278
       -
                               submit(lambda: self._on_post(outputs, str(event), str(payload)))

     

       279
       279
       -
                   

     

       280
       280
       -
                       listen = asyncio.create_task(listen_for_messages())

     

       281
       281
       -
                       

     

       282
       282
       -
                       await asyncio.gather(listen)

     

       283
       283
       -
                   except websockets.ConnectionClosedError as e:

     

       284
       284
       -
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       285
       285
       -
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       286
       286
       -
                       continue

     

       287
       287
       -
       

     

       288
       288
       -
       ALLOWED_POSTING_VISIBILITY = ['public', 'unlisted', 'private']

     

       289
       289
       -
       

     

       290
       290
       -
       class MastodonOutputOptions():

     

       291
       291
       -
           def __init__(self, o: dict) -> None:

     

       292
       292
       -
               self.visibility = 'public'

     

       293
       293
       -
               

     

       294
       294
       -
               visibility = o.get('visibility')

     

       295
       295
       -
               if visibility is not None:

     

       296
       296
       -
                   if visibility not in ALLOWED_POSTING_VISIBILITY:

     

       297
       297
       -
                       raise ValueError(f"'visibility' only accepts {', '.join(ALLOWED_POSTING_VISIBILITY)}, got: {visibility}")

     

       298
       298
       -
                   self.visibility = visibility

     

       299
       299
       -
       

     

       300
       300
       -
       class MastodonOutput(cross.Output):

     

       301
       301
       -
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       302
       302
       -
               super().__init__(input, settings, db)

     

       303
       303
       -
               self.options = settings.get('options') or {}

     

       304
       304
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       305
       305
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       306
       306
       -
               

     

       307
       307
       -
               self.service = instance[:-1] if instance.endswith('/') else instance

     

       308
       308
       -
               

     

       309
       309
       -
               LOGGER.info("Verifying %s credentails...", self.service)

     

       310
       310
       -
               responce = requests.get(f"{self.service}/api/v1/accounts/verify_credentials", headers={

     

       311
       311
       -
                   'Authorization': f'Bearer {self.token}'

     

       312
       312
       -
               })

     

       313
       313
       -
               if responce.status_code != 200:

     

       314
       314
       -
                   LOGGER.error("Failed to validate user credentials!")

     

       315
       315
       -
                   responce.raise_for_status()

     

       316
       316
       -
                   return

     

       317
       317
       -
               self.user_id: str = responce.json()["id"]

     

       318
       318
       -
       

     

       319
       319
       -
               LOGGER.info("Getting %s configuration...", self.service)

     

       320
       320
       -
               responce = requests.get(f"{self.service}/api/v1/instance", headers={

     

       321
       321
       -
                   'Authorization': f'Bearer {self.token}'

     

       322
       322
       -
               })

     

       323
       323
       -
               if responce.status_code != 200:

     

       324
       324
       -
                   LOGGER.error("Failed to get instance info!")

     

       325
       325
       -
                   responce.raise_for_status()

     

       326
       326
       -
                   return

     

       327
       327
       -
               

     

       328
       328
       -
               instance_info: dict = responce.json()

     

       329
       329
       -
               configuration: dict = instance_info['configuration']

     

       330
       330
       -
               

     

       331
       331
       -
               statuses_config: dict = configuration.get('statuses', {})

     

       332
       332
       -
               self.max_characters: int = statuses_config.get('max_characters', 500)

     

       333
       333
       -
               self.max_media_attachments: int = statuses_config.get('max_media_attachments', 4)

     

       334
       334
       -
               self.characters_reserved_per_url: int = statuses_config.get('characters_reserved_per_url', 23)

     

       335
       335
       -
               

     

       336
       336
       -
               media_config: dict = configuration.get('media_attachments', {})

     

       337
       337
       -
               self.image_size_limit: int = media_config.get('image_size_limit', 16777216)

     

       338
       338
       -
               self.video_size_limit: int = media_config.get('video_size_limit', 103809024)

     

       339
       339
       -
               self.supported_mime_types: list[str] = media_config.get('supported_mime_types', POSSIBLE_MIMES)

     

       340
       340
       -
               

     

       341
       341
       -
               # *oma: max post chars

     

       342
       342
       -
               max_toot_chars = instance_info.get('max_toot_chars')

     

       343
       343
       -
               if max_toot_chars:

     

       344
       344
       -
                   self.max_characters: int = max_toot_chars

     

       345
       345
       -
               

     

       346
       346
       -
               # *oma: max upload limit

     

       347
       347
       -
               upload_limit = instance_info.get('upload_limit')

     

       348
       348
       -
               if upload_limit:

     

       349
       349
       -
                   self.image_size_limit: int = upload_limit

     

       350
       350
       -
                   self.video_size_limit: int = upload_limit

     

       351
       351
       -
               

     

       352
       352
       -
               # *oma ext: supported text types

     

       353
       353
       -
               self.text_format = 'text/plain'

     

       354
       354
       -
               pleroma = instance_info.get('pleroma')

     

       355
       355
       -
               if pleroma:

     

       356
       356
       -
                   post_formats: list[str] = pleroma.get('metadata', {}).get('post_formats', [])

     

       357
       357
       -
                   if 'text/x.misskeymarkdown' in post_formats:

     

       358
       358
       -
                       self.text_format = 'text/x.misskeymarkdown'

     

       359
       359
       -
                   elif 'text/markdown' in post_formats:

     

       360
       360
       -
                       self.text_format = 'text/markdown'

     

       361
       361
       -
           

     

       362
       362
       -
           def upload_media(self, attachments: list[media_util.MediaInfo]) -> list[str] | None:

     

       363
       363
       -
               for a in attachments:

     

       364
       364
       -
                   if a.mime.startswith('image/') and len(a.io) > self.image_size_limit:

     

       365
       365
       -
                       return None

     

       366
       366
       -
                   

     

       367
       367
       -
                   if a.mime.startswith('video/') and len(a.io) > self.video_size_limit:

     

       368
       368
       -
                       return None

     

       369
       369
       -
                   

     

       370
       370
       -
                   if not a.mime.startswith('image/') and not a.mime.startswith('video/'):

     

       371
       371
       -
                       if len(a.io) > 7_000_000:

     

       372
       372
       -
                           return None

     

       373
       373
       -
               

     

       374
       374
       -
               uploads: list[dict] = []

     

       375
       375
       -
               for a in attachments:

     

       376
       376
       -
                   data = {}

     

       377
       377
       -
                   if a.alt:

     

       378
       378
       -
                       data['description'] = a.alt

     

       379
       379
       -
                   

     

       380
       380
       -
                   req = requests.post(f"{self.service}/api/v2/media", headers= {

     

       381
       381
       -
                       'Authorization': f'Bearer {self.token}'

     

       382
       382
       -
                   }, files={'file': (a.name, a.io, a.mime)}, data=data)

     

       383
       383
       -
                   

     

       384
       384
       -
                   if req.status_code == 200:

     

       385
       385
       -
                       LOGGER.info("Uploaded %s! (%s)", a.name, req.json()['id'])

     

       386
       386
       -
                       uploads.append({

     

       387
       387
       -
                           'done': True,

     

       388
       388
       -
                           'id': req.json()['id']

     

       389
       389
       -
                       })

     

       390
       390
       -
                   elif req.status_code == 202:

     

       391
       391
       -
                       LOGGER.info("Waiting for %s to process!", a.name)

     

       392
       392
       -
                       uploads.append({

     

       393
       393
       -
                           'done': False,

     

       394
       394
       -
                           'id': req.json()['id']

     

       395
       395
       -
                       })

     

       396
       396
       -
                   else:

     

       397
       397
       -
                       LOGGER.error("Failed to upload %s! %s", a.name, req.text)

     

       398
       398
       -
                       req.raise_for_status()

     

       399
       399
       -
               

     

       400
       400
       -
               while any([not val['done'] for val in uploads]):

     

       401
       401
       -
                   LOGGER.info("Waiting for media to process...")

     

       402
       402
       -
                   time.sleep(3)

     

       403
       403
       -
                   for media in uploads:

     

       404
       404
       -
                       if media['done']:

     

       405
       405
       -
                           continue

     

       406
       406
       -
                       

     

       407
       407
       -
                       reqs = requests.get(f'{self.service}/api/v1/media/{media['id']}', headers={

     

       408
       408
       -
                           'Authorization': f'Bearer {self.token}'

     

       409
       409
       -
                       })

     

       410
       410
       -
                       

     

       411
       411
       -
                       if reqs.status_code == 206:

     

       412
       412
       -
                           continue

     

       413
       413
       -
                       

     

       414
       414
       -
                       if reqs.status_code == 200:

     

       415
       415
       -
                           media['done'] = True

     

       416
       416
       -
                           continue

     

       417
       417
       -
                       reqs.raise_for_status()

     

       418
       418
       -
               

     

       419
       419
       -
               return [val['id'] for val in uploads]

     

       420
       420
       -
       

     

       421
       421
       -
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       422
       422
       -
               p_text: str = ''

     

       423
       423
       -
                   

     

       424
       424
       -
               for token in tokens:

     

       425
       425
       -
                   if isinstance(token, cross.TextToken):

     

       426
       426
       -
                       p_text += token.text

     

       427
       427
       -
                   elif isinstance(token, cross.TagToken):

     

       428
       428
       -
                       p_text += '#' + token.tag

     

       429
       429
       -
                   elif isinstance(token, cross.LinkToken):

     

       430
       430
       -
                       if util.canonical_label(token.label, token.href):

     

       431
       431
       -
                           p_text += token.href

     

       432
       432
       -
                       else:

     

       433
       433
       -
                           if self.text_format == 'text/plain':

     

       434
       434
       -
                               p_text += f'{token.label}: {token.href}'

     

       435
       435
       -
                           elif self.text_format in {'text/x.misskeymarkdown', 'text/markdown'}:

     

       436
       436
       -
                               p_text += f'[{token.label}]({token.href})'

     

       437
       437
       -
                   else:

     

       438
       438
       -
                       return None

     

       439
       439
       -
               

     

       440
       440
       -
               return p_text

     

       441
       441
       -
       

     

       442
       442
       -
           def split_tokens_media(self, tokens: list[cross.Token], media: list[media_util.MediaInfo]):

     

       443
       443
       -
               split_tokens = cross.split_tokens(tokens, self.max_characters, self.characters_reserved_per_url)

     

       444
       444
       -
               post_text: list[str] = []

     

       445
       445
       -
               

     

       446
       446
       -
               for block in split_tokens:

     

       447
       447
       -
                   baked_text = self.token_to_string(block)

     

       448
       448
       -
                   

     

       449
       449
       -
                   if baked_text is None:

     

       450
       450
       -
                       return None

     

       451
       451
       -
                   post_text.append(baked_text)

     

       452
       452
       -
                       

     

       453
       453
       -
               if not post_text:

     

       454
       454
       -
                   post_text = ['']

     

       455
       455
       -
               

     

       456
       456
       -
               posts: list[dict] = [{"text": post_text, "attachments": []} for post_text in post_text]

     

       457
       457
       -
               available_indices: list[int] = list(range(len(posts)))

     

       458
       458
       -
               

     

       459
       459
       -
               current_image_post_idx: int | None = None

     

       460
       460
       -
               

     

       461
       461
       -
               def make_blank_post() -> dict:

     

       462
       462
       -
                   return {

     

       463
       463
       -
                       "text": '',

     

       464
       464
       -
                       "attachments": []

     

       465
       465
       -
                   }

     

       466
       466
       -
               

     

       467
       467
       -
               def pop_next_empty_index() -> int:

     

       468
       468
       -
                   if available_indices:

     

       469
       469
       -
                       return available_indices.pop(0)

     

       470
       470
       -
                   else:

     

       471
       471
       -
                       new_idx = len(posts)

     

       472
       472
       -
                       posts.append(make_blank_post())

     

       473
       473
       -
                       return new_idx

     

       474
       474
       -
               

     

       475
       475
       -
               for att in media:

     

       476
       476
       -
                   if (

     

       477
       477
       -
                       current_image_post_idx is not None

     

       478
       478
       -
                       and len(posts[current_image_post_idx]["attachments"]) < self.max_media_attachments

     

       479
       479
       -
                   ):

     

       480
       480
       -
                       posts[current_image_post_idx]["attachments"].append(att)

     

       481
       481
       -
                   else:

     

       482
       482
       -
                       idx = pop_next_empty_index()

     

       483
       483
       -
                       posts[idx]["attachments"].append(att)

     

       484
       484
       -
                       current_image_post_idx = idx

     

       485
       485
       -
               

     

       486
       486
       -
               result: list[tuple[str, list[media_util.MediaInfo]]] = []

     

       487
       487
       -
               

     

       488
       488
       -
               for p in posts:

     

       489
       489
       -
                   result.append((p['text'], p["attachments"]))

     

       490
       490
       -
               

     

       491
       491
       -
               return result

     

       492
       492
       -
               

     

       493
       493
       -
           def accept_post(self, post: cross.Post):

     

       494
       494
       -
               parent_id = post.get_parent_id()

     

       495
       495
       -
               

     

       496
       496
       -
               new_root_id: int | None = None

     

       497
       497
       -
               new_parent_id: int | None = None

     

       498
       498
       -
               

     

       499
       499
       -
               reply_ref: str | None = None

     

       500
       500
       -
               if parent_id:

     

       501
       501
       -
                   thread_tuple = database.find_mapped_thread(

     

       502
       502
       -
                       self.db,

     

       503
       503
       -
                       parent_id,

     

       504
       504
       -
                       self.input.user_id,

     

       505
       505
       -
                       self.input.service,

     

       506
       506
       -
                       self.user_id,

     

       507
       507
       -
                       self.service

     

       508
       508
       -
                   )

     

       509
       509
       -
                   

     

       510
       510
       -
                   if not thread_tuple:

     

       511
       511
       -
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       512
       512
       -
                       return None

     

       513
       513
       -
                   

     

       514
       514
       -
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       515
       515
       -
               

     

       516
       516
       -
               lang: str

     

       517
       517
       -
               if post.get_languages():

     

       518
       518
       -
                   lang = post.get_languages()[0]

     

       519
       519
       -
               else:

     

       520
       520
       -
                   lang = 'en'

     

       521
       521
       -
               

     

       522
       522
       -
               raw_statuses = self.split_tokens_media(post.get_tokens(), post.get_attachments())

     

       523
       523
       -
               if not raw_statuses:

     

       524
       524
       -
                   LOGGER.error("Failed to split post into statuses?")

     

       525
       525
       -
                   return None

     

       526
       526
       -
               baked_statuses = []

     

       527
       527
       -
               

     

       528
       528
       -
               for status, raw_media in raw_statuses:

     

       529
       529
       -
                   media: list[str] | None = None

     

       530
       530
       -
                   if raw_media:

     

       531
       531
       -
                       media = self.upload_media(raw_media)

     

       532
       532
       -
                       if not media:

     

       533
       533
       -
                           LOGGER.error("Failed to upload attachments!")

     

       534
       534
       -
                           return None

     

       535
       535
       -
                       baked_statuses.append((status, media))

     

       536
       536
       -
                       continue

     

       537
       537
       -
                   baked_statuses.append((status,[]))

     

       538
       538
       -
               

     

       539
       539
       -
               created_statuses: list[str] = []

     

       540
       540
       -
                   

     

       541
       541
       -
               for status, media in baked_statuses:

     

       542
       542
       -
                   payload = {

     

       543
       543
       -
                       'status': status,

     

       544
       544
       -
                       'media_ids': media or [],

     

       545
       545
       -
                       'spoiler_text': post.get_cw(),

     

       546
       546
       -
                       'visibility': self.options.get('visibility', 'public'),

     

       547
       547
       -
                       'content_type': self.text_format,

     

       548
       548
       -
                       'language': lang

     

       549
       549
       -
                   }

     

       550
       550
       -
                   

     

       551
       551
       -
                   if media:

     

       552
       552
       -
                       payload['sensitive'] = post.is_sensitive()

     

       553
       553
       -
                       

     

       554
       554
       -
                       if post.get_cw():

     

       555
       555
       -
                           payload['sensitive'] = True

     

       556
       556
       -
                       

     

       557
       557
       -
                       if not status:

     

       558
       558
       -
                           payload['status'] = '🖼️'

     

       559
       559
       -
                   

     

       560
       560
       -
                   if reply_ref:

     

       561
       561
       -
                       payload['in_reply_to_id'] = reply_ref

     

       562
       562
       -
               

     

       563
       563
       -
                   reqs = requests.post(f'{self.service}/api/v1/statuses', headers={

     

       564
       564
       -
                       'Authorization': f'Bearer {self.token}',

     

       565
       565
       -
                       'Content-Type': 'application/json'

     

       566
       566
       -
                   }, json=payload)

     

       567
       567
       -
               

     

       568
       568
       -
                   if reqs.status_code != 200:

     

       569
       569
       -
                       LOGGER.info("Failed to post status! %s - %s", reqs.status_code, reqs.text)

     

       570
       570
       -
                       reqs.raise_for_status()

     

       571
       571
       -
                   

     

       572
       572
       -
                   reply_ref = reqs.json()['id']

     

       573
       573
       -
                   LOGGER.info("Created new status %s!", reply_ref)

     

       574
       574
       -
                       

     

       575
       575
       -
                   created_statuses.append(reqs.json()['id'])

     

       576
       576
       -
               

     

       577
       577
       -
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       578
       578
       -
               assert db_post, "ghghghhhhh"

     

       579
       579
       -
               

     

       580
       580
       -
               if new_root_id is None or  new_parent_id is None:

     

       581
       581
       -
                   new_root_id = database.insert_post(

     

       582
       582
       -
                       self.db,

     

       583
       583
       -
                       created_statuses[0],

     

       584
       584
       -
                       self.user_id,

     

       585
       585
       -
                       self.service

     

       586
       586
       -
                   )

     

       587
       587
       -
                   new_parent_id = new_root_id

     

       588
       588
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       589
       589
       -
                   created_statuses = created_statuses[1:]

     

       590
       590
       -
               

     

       591
       591
       -
               for db_id in created_statuses:

     

       592
       592
       -
                   new_parent_id = database.insert_reply(

     

       593
       593
       -
                       self.db, 

     

       594
       594
       -
                       db_id,

     

       595
       595
       -
                       self.user_id,

     

       596
       596
       -
                       self.service,

     

       597
       597
       -
                       new_parent_id,

     

       598
       598
       -
                       new_root_id

     

       599
       599
       -
                   )

     

       600
       600
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       601
       601
       -
           

     

       602
       602
       -
           def delete_post(self, identifier: str):

     

       603
       603
       -
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       604
       604
       -
               if not post:

     

       605
       605
       -
                   return

     

       606
       606
       -
               

     

       607
       607
       -
               mappings = database.find_mappings(self.db, post['id'], self.service, self.user_id)

     

       608
       608
       -
               for mapping in mappings[::-1]:

     

       609
       609
       -
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       610
       610
       -
                   requests.delete(f'{self.service}/api/v1/statuses/{mapping[0]}', headers={

     

       611
       611
       -
                       'Authorization': f'Bearer {self.token}'

     

       612
       612
       -
                   })

     

       613
       613
       -
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       614
       614
       -

+116

mastodon/common.py

···

       1
       1
       +
       from bs4 import BeautifulSoup, Tag

     

       2
       2
       +
       from bs4.element import NavigableString

     

       3
       3
       +
       from html import unescape

     

       4
       4
       +
       

     

       5
       5
       +
       import mastodon.markeddown as markeddown

     

       6
       6
       +
       

     

       7
       7
       +
       import cross

     

       8
       8
       +
       from util.media import MediaInfo

     

       9
       9
       +
       

     

       10
       10
       +
       md_parser = markeddown.HTMLToMarkdownParser()

     

       11
       11
       +
       md_parser.preserve_spaces = True

     

       12
       12
       +
       

     

       13
       13
       +
       class MastodonPost(cross.Post):

     

       14
       14
       +
           def __init__(self, status: dict, tokens: list[cross.Token], media_attachments: list[MediaInfo]) -> None:

     

       15
       15
       +
               super().__init__()

     

       16
       16
       +
               self.status = status

     

       17
       17
       +
               self.media_attachments = media_attachments

     

       18
       18
       +
               self.tokens = tokens

     

       19
       19
       +
           

     

       20
       20
       +
           def get_tokens(self) -> list[cross.Token]:

     

       21
       21
       +
               return self.tokens

     

       22
       22
       +
           

     

       23
       23
       +
           def get_parent_id(self) -> str | None:

     

       24
       24
       +
               return self.status.get('in_reply_to_id')

     

       25
       25
       +
           

     

       26
       26
       +
           def get_post_date_iso(self) -> str:

     

       27
       27
       +
               date = self.status.get('created_at')

     

       28
       28
       +
               return date or super().get_post_date_iso()

     

       29
       29
       +
           

     

       30
       30
       +
           def get_cw(self) -> str:

     

       31
       31
       +
               return self.status.get('spoiler_text') or ''

     

       32
       32
       +
           

     

       33
       33
       +
           def get_id(self) -> str:

     

       34
       34
       +
               return self.status['id']

     

       35
       35
       +
           

     

       36
       36
       +
           def get_languages(self) -> list[str]:

     

       37
       37
       +
               if self.status.get('language'):

     

       38
       38
       +
                   return [self.status['language']]

     

       39
       39
       +
               return []

     

       40
       40
       +
           

     

       41
       41
       +
           def is_sensitive(self) -> bool:

     

       42
       42
       +
               return self.status.get('sensitive', False)

     

       43
       43
       +
           

     

       44
       44
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       45
       45
       +
               return self.media_attachments

     

       46
       46
       +
       

     

       47
       47
       +
       def tokenize_post(status: dict) -> list[cross.Token]:

     

       48
       48
       +
           if not status.get('content'):

     

       49
       49
       +
               return []

     

       50
       50
       +
           

     

       51
       51
       +
           soup = BeautifulSoup(status['content'], "html.parser")

     

       52
       52
       +
           tokens: list[cross.Token] = []

     

       53
       53
       +
           

     

       54
       54
       +
           tags: list[dict] = status.get('tags', [])

     

       55
       55
       +
           mentions: list[dict] = status.get('mentions', [])

     

       56
       56
       +
           

     

       57
       57
       +
           def mdd(html):

     

       58
       58
       +
               md_parser.feed(unescape(html))

     

       59
       59
       +
               md = md_parser.get_markdown()

     

       60
       60
       +
               md_parser.reset()

     

       61
       61
       +
               return md

     

       62
       62
       +
           

     

       63
       63
       +
           def recurse(node) -> None:

     

       64
       64
       +
               if isinstance(node, NavigableString):

     

       65
       65
       +
                   tokens.append(cross.TextToken(str(node)))

     

       66
       66
       +
                   return

     

       67
       67
       +
               

     

       68
       68
       +
               if isinstance(node, Tag):

     

       69
       69
       +
                   if node.name.lower() == "a":

     

       70
       70
       +
                       href = node.get("href", "")

     

       71
       71
       +
                       inner_html = "".join(str(c) for c in node.contents)

     

       72
       72
       +
                       link_text_md = mdd(inner_html)

     

       73
       73
       +
                       

     

       74
       74
       +
                       if link_text_md.startswith('@'):

     

       75
       75
       +
                           as_mention = link_text_md[1:]

     

       76
       76
       +
                           for block in mentions:

     

       77
       77
       +
                               if href == block.get('url'):

     

       78
       78
       +
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       79
       79
       +
                                   return

     

       80
       80
       +
                               elif as_mention == block.get('acct') or as_mention == block.get('username'):

     

       81
       81
       +
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       82
       82
       +
                                   return

     

       83
       83
       +
                       

     

       84
       84
       +
                       if link_text_md.startswith('#'):

     

       85
       85
       +
                           as_tag = link_text_md[1:].lower()

     

       86
       86
       +
                           if any(as_tag == block.get('name') for block in tags):

     

       87
       87
       +
                               tokens.append(cross.TagToken(link_text_md[1:]))

     

       88
       88
       +
                               return

     

       89
       89
       +
                       

     

       90
       90
       +
                       # idk if we can safely convert this to string

     

       91
       91
       +
                       tokens.append(cross.LinkToken(str(href), link_text_md))

     

       92
       92
       +
                       return

     

       93
       93
       +
                   

     

       94
       94
       +
                   if node.find("a") is not None:

     

       95
       95
       +
                       for child in node.contents:

     

       96
       96
       +
                           recurse(child)

     

       97
       97
       +
                       return

     

       98
       98
       +
                   

     

       99
       99
       +
                   serialized = str(node)

     

       100
       100
       +
                   markdownified = mdd(serialized)

     

       101
       101
       +
                   if markdownified:

     

       102
       102
       +
                       tokens.append(cross.TextToken(markdownified))

     

       103
       103
       +
                   return

     

       104
       104
       +
               return

     

       105
       105
       +
           

     

       106
       106
       +
           for child in soup.contents:

     

       107
       107
       +
               recurse(child)

     

       108
       108
       +
           

     

       109
       109
       +
           if not tokens:

     

       110
       110
       +
               return []

     

       111
       111
       +
           

     

       112
       112
       +
           last_token = tokens[-1]

     

       113
       113
       +
           if last_token and isinstance(last_token, cross.TextToken) and last_token.text.endswith('\n\n'):

     

       114
       114
       +
               tokens[-1] = cross.TextToken(last_token.text[:-2])

     

       115
       115
       +
           

     

       116
       116
       +
           return tokens

+166

mastodon/input.py

···

       1
       1
       +
       import requests, websockets

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       import asyncio

     

       5
       5
       +
       

     

       6
       6
       +
       from mastodon.common import MastodonPost, tokenize_post

     

       7
       7
       +
       

     

       8
       8
       +
       import cross, util.database as database

     

       9
       9
       +
       from util.util import LOGGER, as_envvar

     

       10
       10
       +
       from util.media import MediaInfo, download_media

     

       11
       11
       +
       from util.database import DataBaseWorker

     

       12
       12
       +
       

     

       13
       13
       +
       from typing import Callable, Any

     

       14
       14
       +
       

     

       15
       15
       +
       ALLOWED_VISIBILITY = ['public', 'unlisted']

     

       16
       16
       +
       MARKDOWNY = ['text/x.misskeymarkdown', 'text/markdown', 'text/plain']

     

       17
       17
       +
       

     

       18
       18
       +
       class MastodonInputOptions():

     

       19
       19
       +
           def __init__(self, o: dict) -> None:

     

       20
       20
       +
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       21
       21
       +
               self.filters = [re.compile(f) for f in o.get('regex_filters', [])]

     

       22
       22
       +
               

     

       23
       23
       +
               allowed_visibility = o.get('allowed_visibility')

     

       24
       24
       +
               if allowed_visibility is not None:

     

       25
       25
       +
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       26
       26
       +
                       raise ValueError(f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}")

     

       27
       27
       +
                   self.allowed_visibility = allowed_visibility

     

       28
       28
       +
       

     

       29
       29
       +
       class MastodonInput(cross.Input):

     

       30
       30
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       31
       31
       +
               self.options = MastodonInputOptions(settings.get('options', {}))

     

       32
       32
       +
               self.token = as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       33
       33
       +
               instance: str = as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       34
       34
       +
               

     

       35
       35
       +
               service = instance[:-1] if instance.endswith('/') else instance

     

       36
       36
       +
               

     

       37
       37
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       38
       38
       +
               responce = requests.get(f"{service}/api/v1/accounts/verify_credentials", headers={

     

       39
       39
       +
                   'Authorization': f'Bearer {self.token}'

     

       40
       40
       +
               })

     

       41
       41
       +
               if responce.status_code != 200:

     

       42
       42
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       43
       43
       +
                   responce.raise_for_status()

     

       44
       44
       +
                   return

     

       45
       45
       +
               

     

       46
       46
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       47
       47
       +
               self.streaming = self._get_streaming_url()

     

       48
       48
       +
               

     

       49
       49
       +
               if not self.streaming:

     

       50
       50
       +
                   raise Exception("Instance %s does not support streaming!", service)

     

       51
       51
       +
       

     

       52
       52
       +
           def _get_streaming_url(self):

     

       53
       53
       +
               response = requests.get(f"{self.service}/api/v1/instance")

     

       54
       54
       +
               response.raise_for_status()

     

       55
       55
       +
               data: dict = response.json()

     

       56
       56
       +
               return (data.get('urls') or {}).get('streaming_api')

     

       57
       57
       +
       

     

       58
       58
       +
           def __to_tokens(self, status: dict):

     

       59
       59
       +
               content_type = status.get('content_type', 'text/plain')

     

       60
       60
       +
               raw_text = status.get('text')

     

       61
       61
       +
               

     

       62
       62
       +
               tags: list[str] = []

     

       63
       63
       +
               for tag in status.get('tags', []):

     

       64
       64
       +
                   tags.append(tag['name'])

     

       65
       65
       +
               

     

       66
       66
       +
               mentions: list[tuple[str, str]] = []

     

       67
       67
       +
               for mention in status.get('mentions', []):

     

       68
       68
       +
                   mentions.append(('@' + mention['username'], '@' + mention['acct']))

     

       69
       69
       +
               

     

       70
       70
       +
               if raw_text and content_type in MARKDOWNY:

     

       71
       71
       +
                   return cross.tokenize_markdown(raw_text, tags, mentions)

     

       72
       72
       +
               

     

       73
       73
       +
               akkoma_ext: dict | None = status.get('akkoma', {}).get('source')

     

       74
       74
       +
               if akkoma_ext:

     

       75
       75
       +
                   if akkoma_ext.get('mediaType') in MARKDOWNY:

     

       76
       76
       +
                       return cross.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       77
       77
       +
                       

     

       78
       78
       +
               return tokenize_post(status)

     

       79
       79
       +
           

     

       80
       80
       +
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       81
       81
       +
               # skip events from other users

     

       82
       82
       +
               if (status.get('account') or {})['id'] != self.user_id:

     

       83
       83
       +
                   return

     

       84
       84
       +
               

     

       85
       85
       +
               if status.get('reblog') or (status.get('quote_id') or status.get('quote')) or status.get('poll'):

     

       86
       86
       +
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       87
       87
       +
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       88
       88
       +
                   # we don't handle quotes.

     

       89
       89
       +
                   LOGGER.info("Skipping '%s'! Reblog, quote or poll..", status['id'])

     

       90
       90
       +
                   return

     

       91
       91
       +
               

     

       92
       92
       +
               in_reply: str | None = status.get('in_reply_to_id')

     

       93
       93
       +
               in_reply_to: str | None = status.get('in_reply_to_account_id')

     

       94
       94
       +
               if in_reply_to and in_reply_to != self.user_id:

     

       95
       95
       +
                   # We don't support replies.

     

       96
       96
       +
                   LOGGER.info("Skipping '%s'! Reply to other user..", status['id'])

     

       97
       97
       +
                   return

     

       98
       98
       +
               

     

       99
       99
       +
               if status.get('visibility') not in self.options.allowed_visibility:

     

       100
       100
       +
                   # Skip f/o and direct posts

     

       101
       101
       +
                   LOGGER.info("Skipping '%s'! '%s' visibility..", status['id'], status.get('visibility'))

     

       102
       102
       +
                   return

     

       103
       103
       +
               

     

       104
       104
       +
               success = database.try_insert_post(self.db, status['id'], in_reply, self.user_id, self.service)

     

       105
       105
       +
               if not success:

     

       106
       106
       +
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", status['id'])

     

       107
       107
       +
                   return

     

       108
       108
       +
               

     

       109
       109
       +
               tokens = self.__to_tokens(status)

     

       110
       110
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       111
       111
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", status['id'])

     

       112
       112
       +
                   return

     

       113
       113
       +
               

     

       114
       114
       +
               LOGGER.info("Crossposting '%s'...", status['id'])

     

       115
       115
       +
               

     

       116
       116
       +
               media_attachments: list[MediaInfo] = []

     

       117
       117
       +
               for attachment in status.get('media_attachments', []):

     

       118
       118
       +
                   LOGGER.info("Downloading %s...", attachment['url'])

     

       119
       119
       +
                   info = download_media(attachment['url'], attachment.get('description') or '')

     

       120
       120
       +
                   if not info:

     

       121
       121
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", status['id'])

     

       122
       122
       +
                       return

     

       123
       123
       +
                   media_attachments.append(info)

     

       124
       124
       +
               

     

       125
       125
       +
               cross_post = MastodonPost(status, tokens, media_attachments)

     

       126
       126
       +
               for output in outputs:

     

       127
       127
       +
                   output.accept_post(cross_post)

     

       128
       128
       +
           

     

       129
       129
       +
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       130
       130
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       131
       131
       +
               if not post:

     

       132
       132
       +
                   return

     

       133
       133
       +
               

     

       134
       134
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       135
       135
       +
               for output in outputs:

     

       136
       136
       +
                   output.delete_post(identifier)

     

       137
       137
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       138
       138
       +
           

     

       139
       139
       +
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       140
       140
       +
               if event == 'update':

     

       141
       141
       +
                   self._on_create_post(outputs, json.loads(payload))

     

       142
       142
       +
               elif event == 'delete':

     

       143
       143
       +
                   self._on_delete_post(outputs, payload)          

     

       144
       144
       +
           

     

       145
       145
       +
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       146
       146
       +
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       147
       147
       +
               

     

       148
       148
       +
               async for ws in websockets.connect(uri, extra_headers={"User-Agent": "XPost/0.0.3"}):

     

       149
       149
       +
                   try:

     

       150
       150
       +
                       LOGGER.info("Listening to %s...", self.streaming)

     

       151
       151
       +
                       

     

       152
       152
       +
                       async def listen_for_messages():

     

       153
       153
       +
                           async for msg in ws:

     

       154
       154
       +
                               data = json.loads(msg)

     

       155
       155
       +
                               event: str = data.get('event')

     

       156
       156
       +
                               payload: str = data.get('payload')

     

       157
       157
       +
                   

     

       158
       158
       +
                               submit(lambda: self._on_post(outputs, str(event), str(payload)))

     

       159
       159
       +
                   

     

       160
       160
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       161
       161
       +
                       

     

       162
       162
       +
                       await asyncio.gather(listen)

     

       163
       163
       +
                   except websockets.ConnectionClosedError as e:

     

       164
       164
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       165
       165
       +
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       166
       166
       +
                       continue

+345

mastodon/output.py

···

       1
       1
       +
       import requests, time

     

       2
       2
       +
       

     

       3
       3
       +
       import cross, util.database as database

     

       4
       4
       +
       from util.util import LOGGER, as_envvar, canonical_label

     

       5
       5
       +
       from util.media import MediaInfo

     

       6
       6
       +
       from util.database import DataBaseWorker

     

       7
       7
       +
       

     

       8
       8
       +
       POSSIBLE_MIMES = [

     

       9
       9
       +
           'audio/ogg',

     

       10
       10
       +
           'audio/mp3',

     

       11
       11
       +
           'image/webp',

     

       12
       12
       +
           'image/jpeg',

     

       13
       13
       +
           'image/png',

     

       14
       14
       +
           'video/mp4',

     

       15
       15
       +
           'video/quicktime',

     

       16
       16
       +
           'video/webm'

     

       17
       17
       +
       ]

     

       18
       18
       +
       

     

       19
       19
       +
       ALLOWED_POSTING_VISIBILITY = ['public', 'unlisted', 'private']

     

       20
       20
       +
       

     

       21
       21
       +
       class MastodonOutputOptions():

     

       22
       22
       +
           def __init__(self, o: dict) -> None:

     

       23
       23
       +
               self.visibility = 'public'

     

       24
       24
       +
               

     

       25
       25
       +
               visibility = o.get('visibility')

     

       26
       26
       +
               if visibility is not None:

     

       27
       27
       +
                   if visibility not in ALLOWED_POSTING_VISIBILITY:

     

       28
       28
       +
                       raise ValueError(f"'visibility' only accepts {', '.join(ALLOWED_POSTING_VISIBILITY)}, got: {visibility}")

     

       29
       29
       +
                   self.visibility = visibility

     

       30
       30
       +
       

     

       31
       31
       +
       class MastodonOutput(cross.Output):

     

       32
       32
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       33
       33
       +
               super().__init__(input, settings, db)

     

       34
       34
       +
               self.options = settings.get('options') or {}

     

       35
       35
       +
               self.token = as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       36
       36
       +
               instance: str = as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       37
       37
       +
               

     

       38
       38
       +
               self.service = instance[:-1] if instance.endswith('/') else instance

     

       39
       39
       +
               

     

       40
       40
       +
               LOGGER.info("Verifying %s credentails...", self.service)

     

       41
       41
       +
               responce = requests.get(f"{self.service}/api/v1/accounts/verify_credentials", headers={

     

       42
       42
       +
                   'Authorization': f'Bearer {self.token}'

     

       43
       43
       +
               })

     

       44
       44
       +
               if responce.status_code != 200:

     

       45
       45
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       46
       46
       +
                   responce.raise_for_status()

     

       47
       47
       +
                   return

     

       48
       48
       +
               self.user_id: str = responce.json()["id"]

     

       49
       49
       +
       

     

       50
       50
       +
               LOGGER.info("Getting %s configuration...", self.service)

     

       51
       51
       +
               responce = requests.get(f"{self.service}/api/v1/instance", headers={

     

       52
       52
       +
                   'Authorization': f'Bearer {self.token}'

     

       53
       53
       +
               })

     

       54
       54
       +
               if responce.status_code != 200:

     

       55
       55
       +
                   LOGGER.error("Failed to get instance info!")

     

       56
       56
       +
                   responce.raise_for_status()

     

       57
       57
       +
                   return

     

       58
       58
       +
               

     

       59
       59
       +
               instance_info: dict = responce.json()

     

       60
       60
       +
               configuration: dict = instance_info['configuration']

     

       61
       61
       +
               

     

       62
       62
       +
               statuses_config: dict = configuration.get('statuses', {})

     

       63
       63
       +
               self.max_characters: int = statuses_config.get('max_characters', 500)

     

       64
       64
       +
               self.max_media_attachments: int = statuses_config.get('max_media_attachments', 4)

     

       65
       65
       +
               self.characters_reserved_per_url: int = statuses_config.get('characters_reserved_per_url', 23)

     

       66
       66
       +
               

     

       67
       67
       +
               media_config: dict = configuration.get('media_attachments', {})

     

       68
       68
       +
               self.image_size_limit: int = media_config.get('image_size_limit', 16777216)

     

       69
       69
       +
               self.video_size_limit: int = media_config.get('video_size_limit', 103809024)

     

       70
       70
       +
               self.supported_mime_types: list[str] = media_config.get('supported_mime_types', POSSIBLE_MIMES)

     

       71
       71
       +
               

     

       72
       72
       +
               # *oma: max post chars

     

       73
       73
       +
               max_toot_chars = instance_info.get('max_toot_chars')

     

       74
       74
       +
               if max_toot_chars:

     

       75
       75
       +
                   self.max_characters: int = max_toot_chars

     

       76
       76
       +
               

     

       77
       77
       +
               # *oma: max upload limit

     

       78
       78
       +
               upload_limit = instance_info.get('upload_limit')

     

       79
       79
       +
               if upload_limit:

     

       80
       80
       +
                   self.image_size_limit: int = upload_limit

     

       81
       81
       +
                   self.video_size_limit: int = upload_limit

     

       82
       82
       +
               

     

       83
       83
       +
               # *oma ext: supported text types

     

       84
       84
       +
               self.text_format = 'text/plain'

     

       85
       85
       +
               pleroma = instance_info.get('pleroma')

     

       86
       86
       +
               if pleroma:

     

       87
       87
       +
                   post_formats: list[str] = pleroma.get('metadata', {}).get('post_formats', [])

     

       88
       88
       +
                   if 'text/x.misskeymarkdown' in post_formats:

     

       89
       89
       +
                       self.text_format = 'text/x.misskeymarkdown'

     

       90
       90
       +
                   elif 'text/markdown' in post_formats:

     

       91
       91
       +
                       self.text_format = 'text/markdown'

     

       92
       92
       +
           

     

       93
       93
       +
           def upload_media(self, attachments: list[MediaInfo]) -> list[str] | None:

     

       94
       94
       +
               for a in attachments:

     

       95
       95
       +
                   if a.mime.startswith('image/') and len(a.io) > self.image_size_limit:

     

       96
       96
       +
                       return None

     

       97
       97
       +
                   

     

       98
       98
       +
                   if a.mime.startswith('video/') and len(a.io) > self.video_size_limit:

     

       99
       99
       +
                       return None

     

       100
       100
       +
                   

     

       101
       101
       +
                   if not a.mime.startswith('image/') and not a.mime.startswith('video/'):

     

       102
       102
       +
                       if len(a.io) > 7_000_000:

     

       103
       103
       +
                           return None

     

       104
       104
       +
               

     

       105
       105
       +
               uploads: list[dict] = []

     

       106
       106
       +
               for a in attachments:

     

       107
       107
       +
                   data = {}

     

       108
       108
       +
                   if a.alt:

     

       109
       109
       +
                       data['description'] = a.alt

     

       110
       110
       +
                   

     

       111
       111
       +
                   req = requests.post(f"{self.service}/api/v2/media", headers= {

     

       112
       112
       +
                       'Authorization': f'Bearer {self.token}'

     

       113
       113
       +
                   }, files={'file': (a.name, a.io, a.mime)}, data=data)

     

       114
       114
       +
                   

     

       115
       115
       +
                   if req.status_code == 200:

     

       116
       116
       +
                       LOGGER.info("Uploaded %s! (%s)", a.name, req.json()['id'])

     

       117
       117
       +
                       uploads.append({

     

       118
       118
       +
                           'done': True,

     

       119
       119
       +
                           'id': req.json()['id']

     

       120
       120
       +
                       })

     

       121
       121
       +
                   elif req.status_code == 202:

     

       122
       122
       +
                       LOGGER.info("Waiting for %s to process!", a.name)

     

       123
       123
       +
                       uploads.append({

     

       124
       124
       +
                           'done': False,

     

       125
       125
       +
                           'id': req.json()['id']

     

       126
       126
       +
                       })

     

       127
       127
       +
                   else:

     

       128
       128
       +
                       LOGGER.error("Failed to upload %s! %s", a.name, req.text)

     

       129
       129
       +
                       req.raise_for_status()

     

       130
       130
       +
               

     

       131
       131
       +
               while any([not val['done'] for val in uploads]):

     

       132
       132
       +
                   LOGGER.info("Waiting for media to process...")

     

       133
       133
       +
                   time.sleep(3)

     

       134
       134
       +
                   for media in uploads:

     

       135
       135
       +
                       if media['done']:

     

       136
       136
       +
                           continue

     

       137
       137
       +
                       

     

       138
       138
       +
                       reqs = requests.get(f'{self.service}/api/v1/media/{media['id']}', headers={

     

       139
       139
       +
                           'Authorization': f'Bearer {self.token}'

     

       140
       140
       +
                       })

     

       141
       141
       +
                       

     

       142
       142
       +
                       if reqs.status_code == 206:

     

       143
       143
       +
                           continue

     

       144
       144
       +
                       

     

       145
       145
       +
                       if reqs.status_code == 200:

     

       146
       146
       +
                           media['done'] = True

     

       147
       147
       +
                           continue

     

       148
       148
       +
                       reqs.raise_for_status()

     

       149
       149
       +
               

     

       150
       150
       +
               return [val['id'] for val in uploads]

     

       151
       151
       +
       

     

       152
       152
       +
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       153
       153
       +
               p_text: str = ''

     

       154
       154
       +
                   

     

       155
       155
       +
               for token in tokens:

     

       156
       156
       +
                   if isinstance(token, cross.TextToken):

     

       157
       157
       +
                       p_text += token.text

     

       158
       158
       +
                   elif isinstance(token, cross.TagToken):

     

       159
       159
       +
                       p_text += '#' + token.tag

     

       160
       160
       +
                   elif isinstance(token, cross.LinkToken):

     

       161
       161
       +
                       if canonical_label(token.label, token.href):

     

       162
       162
       +
                           p_text += token.href

     

       163
       163
       +
                       else:

     

       164
       164
       +
                           if self.text_format == 'text/plain':

     

       165
       165
       +
                               p_text += f'{token.label}: {token.href}'

     

       166
       166
       +
                           elif self.text_format in {'text/x.misskeymarkdown', 'text/markdown'}:

     

       167
       167
       +
                               p_text += f'[{token.label}]({token.href})'

     

       168
       168
       +
                   else:

     

       169
       169
       +
                       return None

     

       170
       170
       +
               

     

       171
       171
       +
               return p_text

     

       172
       172
       +
       

     

       173
       173
       +
           def split_tokens_media(self, tokens: list[cross.Token], media: list[MediaInfo]):

     

       174
       174
       +
               split_tokens = cross.split_tokens(tokens, self.max_characters, self.characters_reserved_per_url)

     

       175
       175
       +
               post_text: list[str] = []

     

       176
       176
       +
               

     

       177
       177
       +
               for block in split_tokens:

     

       178
       178
       +
                   baked_text = self.token_to_string(block)

     

       179
       179
       +
                   

     

       180
       180
       +
                   if baked_text is None:

     

       181
       181
       +
                       return None

     

       182
       182
       +
                   post_text.append(baked_text)

     

       183
       183
       +
                       

     

       184
       184
       +
               if not post_text:

     

       185
       185
       +
                   post_text = ['']

     

       186
       186
       +
               

     

       187
       187
       +
               posts: list[dict] = [{"text": post_text, "attachments": []} for post_text in post_text]

     

       188
       188
       +
               available_indices: list[int] = list(range(len(posts)))

     

       189
       189
       +
               

     

       190
       190
       +
               current_image_post_idx: int | None = None

     

       191
       191
       +
               

     

       192
       192
       +
               def make_blank_post() -> dict:

     

       193
       193
       +
                   return {

     

       194
       194
       +
                       "text": '',

     

       195
       195
       +
                       "attachments": []

     

       196
       196
       +
                   }

     

       197
       197
       +
               

     

       198
       198
       +
               def pop_next_empty_index() -> int:

     

       199
       199
       +
                   if available_indices:

     

       200
       200
       +
                       return available_indices.pop(0)

     

       201
       201
       +
                   else:

     

       202
       202
       +
                       new_idx = len(posts)

     

       203
       203
       +
                       posts.append(make_blank_post())

     

       204
       204
       +
                       return new_idx

     

       205
       205
       +
               

     

       206
       206
       +
               for att in media:

     

       207
       207
       +
                   if (

     

       208
       208
       +
                       current_image_post_idx is not None

     

       209
       209
       +
                       and len(posts[current_image_post_idx]["attachments"]) < self.max_media_attachments

     

       210
       210
       +
                   ):

     

       211
       211
       +
                       posts[current_image_post_idx]["attachments"].append(att)

     

       212
       212
       +
                   else:

     

       213
       213
       +
                       idx = pop_next_empty_index()

     

       214
       214
       +
                       posts[idx]["attachments"].append(att)

     

       215
       215
       +
                       current_image_post_idx = idx

     

       216
       216
       +
               

     

       217
       217
       +
               result: list[tuple[str, list[MediaInfo]]] = []

     

       218
       218
       +
               

     

       219
       219
       +
               for p in posts:

     

       220
       220
       +
                   result.append((p['text'], p["attachments"]))

     

       221
       221
       +
               

     

       222
       222
       +
               return result

     

       223
       223
       +
               

     

       224
       224
       +
           def accept_post(self, post: cross.Post):

     

       225
       225
       +
               parent_id = post.get_parent_id()

     

       226
       226
       +
               

     

       227
       227
       +
               new_root_id: int | None = None

     

       228
       228
       +
               new_parent_id: int | None = None

     

       229
       229
       +
               

     

       230
       230
       +
               reply_ref: str | None = None

     

       231
       231
       +
               if parent_id:

     

       232
       232
       +
                   thread_tuple = database.find_mapped_thread(

     

       233
       233
       +
                       self.db,

     

       234
       234
       +
                       parent_id,

     

       235
       235
       +
                       self.input.user_id,

     

       236
       236
       +
                       self.input.service,

     

       237
       237
       +
                       self.user_id,

     

       238
       238
       +
                       self.service

     

       239
       239
       +
                   )

     

       240
       240
       +
                   

     

       241
       241
       +
                   if not thread_tuple:

     

       242
       242
       +
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       243
       243
       +
                       return None

     

       244
       244
       +
                   

     

       245
       245
       +
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       246
       246
       +
               

     

       247
       247
       +
               lang: str

     

       248
       248
       +
               if post.get_languages():

     

       249
       249
       +
                   lang = post.get_languages()[0]

     

       250
       250
       +
               else:

     

       251
       251
       +
                   lang = 'en'

     

       252
       252
       +
               

     

       253
       253
       +
               raw_statuses = self.split_tokens_media(post.get_tokens(), post.get_attachments())

     

       254
       254
       +
               if not raw_statuses:

     

       255
       255
       +
                   LOGGER.error("Failed to split post into statuses?")

     

       256
       256
       +
                   return None

     

       257
       257
       +
               baked_statuses = []

     

       258
       258
       +
               

     

       259
       259
       +
               for status, raw_media in raw_statuses:

     

       260
       260
       +
                   media: list[str] | None = None

     

       261
       261
       +
                   if raw_media:

     

       262
       262
       +
                       media = self.upload_media(raw_media)

     

       263
       263
       +
                       if not media:

     

       264
       264
       +
                           LOGGER.error("Failed to upload attachments!")

     

       265
       265
       +
                           return None

     

       266
       266
       +
                       baked_statuses.append((status, media))

     

       267
       267
       +
                       continue

     

       268
       268
       +
                   baked_statuses.append((status,[]))

     

       269
       269
       +
               

     

       270
       270
       +
               created_statuses: list[str] = []

     

       271
       271
       +
                   

     

       272
       272
       +
               for status, media in baked_statuses:

     

       273
       273
       +
                   payload = {

     

       274
       274
       +
                       'status': status,

     

       275
       275
       +
                       'media_ids': media or [],

     

       276
       276
       +
                       'spoiler_text': post.get_cw(),

     

       277
       277
       +
                       'visibility': self.options.get('visibility', 'public'),

     

       278
       278
       +
                       'content_type': self.text_format,

     

       279
       279
       +
                       'language': lang

     

       280
       280
       +
                   }

     

       281
       281
       +
                   

     

       282
       282
       +
                   if media:

     

       283
       283
       +
                       payload['sensitive'] = post.is_sensitive()

     

       284
       284
       +
                       

     

       285
       285
       +
                       if post.get_cw():

     

       286
       286
       +
                           payload['sensitive'] = True

     

       287
       287
       +
                       

     

       288
       288
       +
                       if not status:

     

       289
       289
       +
                           payload['status'] = '🖼️'

     

       290
       290
       +
                   

     

       291
       291
       +
                   if reply_ref:

     

       292
       292
       +
                       payload['in_reply_to_id'] = reply_ref

     

       293
       293
       +
               

     

       294
       294
       +
                   reqs = requests.post(f'{self.service}/api/v1/statuses', headers={

     

       295
       295
       +
                       'Authorization': f'Bearer {self.token}',

     

       296
       296
       +
                       'Content-Type': 'application/json'

     

       297
       297
       +
                   }, json=payload)

     

       298
       298
       +
               

     

       299
       299
       +
                   if reqs.status_code != 200:

     

       300
       300
       +
                       LOGGER.info("Failed to post status! %s - %s", reqs.status_code, reqs.text)

     

       301
       301
       +
                       reqs.raise_for_status()

     

       302
       302
       +
                   

     

       303
       303
       +
                   reply_ref = reqs.json()['id']

     

       304
       304
       +
                   LOGGER.info("Created new status %s!", reply_ref)

     

       305
       305
       +
                       

     

       306
       306
       +
                   created_statuses.append(reqs.json()['id'])

     

       307
       307
       +
               

     

       308
       308
       +
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       309
       309
       +
               assert db_post, "ghghghhhhh"

     

       310
       310
       +
               

     

       311
       311
       +
               if new_root_id is None or  new_parent_id is None:

     

       312
       312
       +
                   new_root_id = database.insert_post(

     

       313
       313
       +
                       self.db,

     

       314
       314
       +
                       created_statuses[0],

     

       315
       315
       +
                       self.user_id,

     

       316
       316
       +
                       self.service

     

       317
       317
       +
                   )

     

       318
       318
       +
                   new_parent_id = new_root_id

     

       319
       319
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       320
       320
       +
                   created_statuses = created_statuses[1:]

     

       321
       321
       +
               

     

       322
       322
       +
               for db_id in created_statuses:

     

       323
       323
       +
                   new_parent_id = database.insert_reply(

     

       324
       324
       +
                       self.db, 

     

       325
       325
       +
                       db_id,

     

       326
       326
       +
                       self.user_id,

     

       327
       327
       +
                       self.service,

     

       328
       328
       +
                       new_parent_id,

     

       329
       329
       +
                       new_root_id

     

       330
       330
       +
                   )

     

       331
       331
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       332
       332
       +
           

     

       333
       333
       +
           def delete_post(self, identifier: str):

     

       334
       334
       +
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       335
       335
       +
               if not post:

     

       336
       336
       +
                   return

     

       337
       337
       +
               

     

       338
       338
       +
               mappings = database.find_mappings(self.db, post['id'], self.service, self.user_id)

     

       339
       339
       +
               for mapping in mappings[::-1]:

     

       340
       340
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       341
       341
       +
                   requests.delete(f'{self.service}/api/v1/statuses/{mapping[0]}', headers={

     

       342
       342
       +
                       'Authorization': f'Bearer {self.token}'

     

       343
       343
       +
                   })

     

       344
       344
       +
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       345
       345
       +

+1 -1

media_util.py util/media.py

···

       2
       2
        
       import subprocess

     

       3
       3
        
       import json

     

       4
       4
        
       import re, urllib.parse, os

     

       5
       5
       -
       from util import LOGGER

     

       5
       5
       +
       from util.util import LOGGER

     

       6
       6
        
       import magic

     

       7
       7
        
       

     

       8
       8
        
       FILENAME = re.compile(r'filename="?([^\";]*)"?')

+10 -39

misskey.py misskey/input.py

···

       1
       1
       -
       import cross, media_util, util, database

     

       2
       2
       -
       from util import LOGGER

     

       3
       1
        
       import requests, websockets

     

       4
       4
       -
       from typing import Callable, Any

     

       5
       2
        
       import asyncio

     

       6
       3
        
       import json, uuid

     

       7
       4
        
       import re

     

       8
       5
        
       

     

       6
       6
       +
       from misskey.common import MisskeyPost

     

       9
       7
        
       

     

       10
       10
       -
       class MisskeyPost(cross.Post):

     

       11
       11
       -
           def __init__(self, note: dict, tokens: list[cross.Token], files: list[media_util.MediaInfo]) -> None:

     

       12
       12
       -
               super().__init__()

     

       13
       13
       -
               self.note = note

     

       14
       14
       -
               self.sensitive = any([a.get('isSensitive', False) for a in note.get('files', [])])

     

       15
       15
       -
               self.media_attachments = files

     

       16
       16
       -
               self.tokens = tokens

     

       17
       17
       -
           

     

       18
       18
       -
           def get_tokens(self) -> list[cross.Token]:

     

       19
       19
       -
               return self.tokens

     

       20
       20
       -
           

     

       21
       21
       -
           def get_parent_id(self) -> str | None:

     

       22
       22
       -
               return self.note.get('replyId')

     

       23
       23
       -
           

     

       24
       24
       -
           def get_post_date_iso(self) -> str:

     

       25
       25
       -
               date = self.note.get('createdAt')

     

       26
       26
       -
               return date or super().get_post_date_iso()

     

       27
       27
       -
           

     

       28
       28
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       29
       29
       -
               return self.media_attachments

     

       30
       30
       -
           

     

       31
       31
       -
           def get_id(self) -> str:

     

       32
       32
       -
               return self.note['id']

     

       33
       33
       -
           

     

       34
       34
       -
           def get_cw(self) -> str:

     

       35
       35
       -
               return self.note.get('cw') or ''

     

       36
       36
       -
           

     

       37
       37
       -
           def get_languages(self) -> list[str]:

     

       38
       38
       -
               return []

     

       39
       39
       -
           

     

       40
       40
       -
           def is_sensitive(self) -> bool:

     

       41
       41
       -
               return self.sensitive

     

       8
       8
       +
       import cross, util.database as database

     

       9
       9
       +
       from util.media import MediaInfo, download_media

     

       10
       10
       +
       from util.util import LOGGER, as_envvar

     

       11
       11
       +
       

     

       12
       12
       +
       from typing import Callable, Any

     

       42
       13
        
           

     

       43
       14
        
       ALLOWED_VISIBILITY = ['public', 'home']

     

       44
       15
        
           

     
···

       56
       27
        
       class MisskeyInput(cross.Input):

     

       57
       28
        
           def __init__(self, settings: dict, db: cross.DataBaseWorker) -> None:

     

       58
       29
        
               self.options = MisskeyInputOptions(settings.get('options', {}))

     

       59
       59
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       60
       60
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       30
       30
       +
               self.token = as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       31
       31
       +
               instance: str = as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       61
       32
        
               

     

       62
       33
        
               service = instance[:-1] if instance.endswith('/') else instance

     

       63
       34
        
               

     
···

       111
       82
        
               

     

       112
       83
        
               LOGGER.info("Crossposting '%s'...", note['id'])

     

       113
       84
        
               

     

       114
       114
       -
               media_attachments: list[media_util.MediaInfo] = []

     

       85
       85
       +
               media_attachments: list[MediaInfo] = []

     

       115
       86
        
               for attachment in note.get('files', []):

     

       116
       87
        
                   LOGGER.info("Downloading %s...", attachment['url'])

     

       117
       117
       -
                   info = media_util.download_media(attachment['url'], attachment.get('comment') or '')

     

       88
       88
       +
                   info = download_media(attachment['url'], attachment.get('comment') or '')

     

       118
       89
        
                   if not info:

     

       119
       90
        
                       LOGGER.error("Skipping '%s'. Failed to download media!", note['id'])

     

       120
       91
        
                       return

+35

misskey/common.py

···

       1
       1
       +
       import cross

     

       2
       2
       +
       from util.media import MediaInfo

     

       3
       3
       +
       

     

       4
       4
       +
       class MisskeyPost(cross.Post):

     

       5
       5
       +
           def __init__(self, note: dict, tokens: list[cross.Token], files: list[MediaInfo]) -> None:

     

       6
       6
       +
               super().__init__()

     

       7
       7
       +
               self.note = note

     

       8
       8
       +
               self.sensitive = any([a.get('isSensitive', False) for a in note.get('files', [])])

     

       9
       9
       +
               self.media_attachments = files

     

       10
       10
       +
               self.tokens = tokens

     

       11
       11
       +
           

     

       12
       12
       +
           def get_tokens(self) -> list[cross.Token]:

     

       13
       13
       +
               return self.tokens

     

       14
       14
       +
           

     

       15
       15
       +
           def get_parent_id(self) -> str | None:

     

       16
       16
       +
               return self.note.get('replyId')

     

       17
       17
       +
           

     

       18
       18
       +
           def get_post_date_iso(self) -> str:

     

       19
       19
       +
               date = self.note.get('createdAt')

     

       20
       20
       +
               return date or super().get_post_date_iso()

     

       21
       21
       +
           

     

       22
       22
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       23
       23
       +
               return self.media_attachments

     

       24
       24
       +
           

     

       25
       25
       +
           def get_id(self) -> str:

     

       26
       26
       +
               return self.note['id']

     

       27
       27
       +
           

     

       28
       28
       +
           def get_cw(self) -> str:

     

       29
       29
       +
               return self.note.get('cw') or ''

     

       30
       30
       +
           

     

       31
       31
       +
           def get_languages(self) -> list[str]:

     

       32
       32
       +
               return []

     

       33
       33
       +
           

     

       34
       34
       +
           def is_sensitive(self) -> bool:

     

       35
       35
       +
               return self.sensitive

util.py util/util.py