commit 2da6655f69b5471fde202fba41f4f4c4af5115c2 · zenfyr.dev/xpost

.gitignore

···

       9
       9
        
       # Virtual environments

     

       10
       10
        
       .venv

     

       11
       11
        
       

     

       12
       12
       +
       .vscode/

     

       12
       13
        
       data/

+52 -18

README.md

···

       16
       16
        
       uv sync

     

       17
       17
        
       ```

     

       18
       18
        
       

     

       19
       19
       -
       print help message:

     

       19
       19
       +
       generate settings.json on first launch

     

       20
       20
        
       

     

       21
       21
        
       ```

     

       22
       22
       -
       uv run main.py run --help

     

       22
       22
       +
       uv run main.py

     

       23
       23
        
       ```

     

       24
       24
        
       

     

       25
       25
        
       # Settings

     

       26
       26
        
       

     

       27
       27
       -
       ## Bluesky

     

       27
       27
       +
       the tool allows you to specify an input and multiple outputs to post to.

     

       28
       28
        
       

     

       29
       29
       -
       in the bluesky block, you can configure who is allowed to reply to and quote the new posts.

     

       29
       29
       +
       some options accept a envvar syntax:

     

       30
       30
        
       

     

       31
       31
       -
       `quote_gate`:

     

       31
       31
       +
       ```json

     

       32
       32
       +
       {

     

       33
       33
       +
           "token": "env:TOKEN"

     

       34
       34
       +
       }

     

       35
       35
       +
       ```

     

       32
       36
        
       

     

       33
       33
       -
       prevent users from quoting the post. default: `false`

     

       37
       37
       +
       ## Inputs

     

       34
       38
        
       

     

       35
       35
       -
       `thread_gate`: 

     

       39
       39
       +
       ### Mastodon WebSocket `mastodon-wss`

     

       36
       40
        
       

     

       37
       37
       -
       prevent users from replying to the post. leave empty to prevent replies completely.

     

       41
       41
       +
       listens to the user's home timeline for new posts, crossposts only the public/unlisted ones by the user.

     

       38
       42
        
       

     

       39
       39
       -
       accepted values:

     

       40
       40
       -
       - `following` followed users.

     

       41
       41
       -
       - `followers` users following the account.

     

       42
       42
       -
       - `mentioned` users mentioned in the post.

     

       43
       43
       -
       - `everybody` everybody is allowed to reply to the post. all other options will be skipped.

     

       44
       44
       -
       

     

       45
       45
       -
       

     

       46
       46
       -
       # Supported Software

     

       43
       43
       +
       ```json5

     

       44
       44
       +
       {

     

       45
       45
       +
           "type": "mastodon-wss", // type

     

       46
       46
       +
           "instance": "env:MASTODON_INSTANCE", // mastodon instance

     

       47
       47
       +
           "token": "env:MASTODON_TOKEN", // user token (use webtools)

     

       48
       48
       +
           "options": {

     

       49
       49
       +
               "allowed_visibility": [

     

       50
       50
       +
                   "public",

     

       51
       51
       +
                   "unlisted"

     

       52
       52
       +
               ]

     

       53
       53
       +
           }

     

       54
       54
       +
       }

     

       55
       55
       +
       ```

     

       47
       56
        
       

     

       48
       57
        
       any instance implementing `/api/v1/instance`, `/api/v1/accounts/verify_credentials` and `/api/v1/streaming?stream` will work fine.

     

       49
       58
        
       

     

       50
       59
        
       confirmed supported:

     

       51
       60
        
       - Mastodon

     

       52
       61
        
       - Iceshrimp.NET

     

       53
       53
       -
       - Sharkey

     

       54
       62
        
       - Akkoma

     

       55
       63
        
       

     

       56
       64
        
       confirmed unsupported:

     

       57
       57
       -
       - Mitra
     

       65
       65
       +
       - Mitra

     

       66
       66
       +
       - Sharkey

     

       67
       67
       +
       

     

       68
       68
       +
       ## Outputs

     

       69
       69
       +
       

     

       70
       70
       +
       ### Bluesky

     

       71
       71
       +
       

     

       72
       72
       +
       in the bluesky block, you can configure who is allowed to reply to and quote the new posts.

     

       73
       73
       +
       

     

       74
       74
       +
       ```json5

     

       75
       75
       +
       {

     

       76
       76
       +
           "type": "bluesky", // type

     

       77
       77
       +
           "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)

     

       78
       78
       +
           "app_password": "env:BLUESKY_APP_PASSWORD", // https://bsky.app/settings/app-passwords

     

       79
       79
       +
           "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)

     

       80
       80
       +
           "pds": "env:BLUESKY_PDS", // specify Your PDS directly (avoids DID doc lookup)

     

       81
       81
       +
           "options": {

     

       82
       82
       +
               "quote_gate": false, // block users from quoting the post

     

       83
       83
       +
               "thread_gate": [ // block replies. leave empty to disable replies

     

       84
       84
       +
                 "mentioned",

     

       85
       85
       +
                 "following",

     

       86
       86
       +
                 "followers",

     

       87
       87
       +
                 "everybody" // allow everybody to reply (ignores other options)

     

       88
       88
       +
               ]

     

       89
       89
       +
           }

     

       90
       90
       +
       }

     

       91
       91
       +
       ```

+286 -15

bluesky.py

···

       1
       1
       -
       from atproto import client_utils, Client, AtUri

     

       1
       1
       +
       from atproto import client_utils, Client, AtUri, IdResolver

     

       2
       2
        
       from atproto_client import models

     

       3
       3
       +
       import json

     

       4
       4
       +
       import cross

     

       5
       5
       +
       import database

     

       6
       6
       +
       from database import DataBaseWorker

     

       7
       7
       +
       import util

     

       8
       8
       +
       import media_util

     

       9
       9
       +
       from util import LOGGER

     

       10
       10
       +
       

     

       11
       11
       +
       # only for lexicon reference

     

       12
       12
       +
       SERVICE = 'https://bsky.app'

     

       13
       13
       +
       

     

       14
       14
       +
       ADULT_LABEL = ["sexual content", "nsfw"]

     

       15
       15
       +
       PORN_LABEL = ["porn", "yiff"]

     

       16
       16
       +
       

     

       17
       17
       +
       class BlueskyOutput(cross.Output):

     

       18
       18
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       19
       19
       +
               super().__init__(input, settings, db)

     

       20
       20
       +
               self.options = util.safe_get(settings, 'options', {})

     

       21
       21
       +
               

     

       22
       22
       +
               if not util.get_or_envvar(settings, 'app-password'):

     

       23
       23
       +
                   raise Exception("Account app password not provided!")

     

       24
       24
       +
               

     

       25
       25
       +
               resolver = IdResolver()

     

       26
       26
       +
               did: str | None = util.get_or_envvar(settings, 'did')

     

       27
       27
       +
               if not did:

     

       28
       28
       +
                   LOGGER.info("Resolving ATP identity for %s...", util.get_or_envvar(settings, 'handle'))

     

       29
       29
       +
                   did = resolver.handle.resolve(util.get_or_envvar(settings, 'handle'))

     

       30
       30
       +
               if not did:

     

       31
       31
       +
                   raise Exception("Failed to resolve DID!")

     

       32
       32
       +
               

     

       33
       33
       +
               pds: str | None = util.get_or_envvar(settings, 'pds')

     

       34
       34
       +
               if not pds:

     

       35
       35
       +
                   LOGGER.info("Resolving PDS from DID document...")

     

       36
       36
       +
                   did_doc = resolver.did.resolve(did)

     

       37
       37
       +
                   if not did_doc:

     

       38
       38
       +
                       raise Exception("Failed to resolve DID doc for '%s'", did)

     

       39
       39
       +
                   pds = did_doc.get_pds_endpoint()

     

       40
       40
       +
               if not pds:

     

       41
       41
       +
                   raise Exception("Failed to resolve PDS!")

     

       42
       42
       +
               

     

       43
       43
       +
               self.client = Client(pds)

     

       44
       44
       +
               self.client.login(did, util.get_or_envvar(settings, 'app-password'))

     

       45
       45
       +
               self.bsky = Bluesky(self.client)

     

       46
       46
       +
           

     

       47
       47
       +
           def accept_post(self, post: cross.Post):

     

       48
       48
       +
               login = self.client.me

     

       49
       49
       +
               if not login:

     

       50
       50
       +
                   raise Exception("Client not logged in!")

     

       51
       51
       +
               

     

       52
       52
       +
               parent_id = post.get_parent_id()

     

       53
       53
       +
               

     

       54
       54
       +
               # used for db insertion

     

       55
       55
       +
               new_root_id = None

     

       56
       56
       +
               new_parent_id = None

     

       57
       57
       +
               

     

       58
       58
       +
               root_ref = None

     

       59
       59
       +
               reply_ref = None

     

       60
       60
       +
               if parent_id:

     

       61
       61
       +
                   # parentless posts are skipped by the input

     

       62
       62
       +
                   reply_data = database.find_post(self.db, parent_id, self.input.user_id, self.input.service)

     

       63
       63
       +
                   assert reply_data, "reply_data requested, but doesn't exist in db (should've been skipped bt firehose)"

     

       64
       64
       +
                   

     

       65
       65
       +
                   reply_mappings = [json.loads(data[0]) for data in database.find_mappings(self.db, reply_data['id'], SERVICE, login.did)]

     

       66
       66
       +
                   if not reply_mappings:

     

       67
       67
       +
                       LOGGER.error("Failed to find mappings for a post in the db!")

     

       68
       68
       +
                       return

     

       69
       69
       +
                   

     

       70
       70
       +
                   reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_mappings[-1]['uri']), cid=str(reply_mappings[-1]['cid']))

     

       71
       71
       +
                   root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_mappings[0]['uri']), cid=str(reply_mappings[0]['cid']))

     

       72
       72
       +
                   if reply_data['root_id']:

     

       73
       73
       +
                       root_data = database.find_post_by_id(self.db, reply_data['root_id'])

     

       74
       74
       +
                       assert root_data, "root_data requested but doesn't exist in db"

     

       75
       75
       +
                       

     

       76
       76
       +
                       root_mappings = [json.loads(data[0]) for data in database.find_mappings(self.db, reply_data['root_id'], SERVICE, login.did)]

     

       77
       77
       +
                       if not root_mappings:

     

       78
       78
       +
                           LOGGER.error("Failed to find mappings for a post in the db!")

     

       79
       79
       +
                           return

     

       80
       80
       +
                       root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_mappings[0]['uri']), cid=str(root_mappings[0]['cid']))

     

       81
       81
       +
                   

     

       82
       82
       +
                   new_root_id = reply_data['root_id']

     

       83
       83
       +
                   new_parent_id = reply_data['id']

     

       84
       84
       +
                   

     

       85
       85
       +
                   root_ref = models.create_strong_ref(root_record)

     

       86
       86
       +
                   reply_ref = models.create_strong_ref(reply_record)

     

       87
       87
       +
               

     

       88
       88
       +
               tokens = post.get_tokens()

     

       89
       89
       +
               

     

       90
       90
       +
               unique_labels: set[str] = set()

     

       91
       91
       +
               cw = post.get_cw()

     

       92
       92
       +
               if cw:

     

       93
       93
       +
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       94
       94
       +
                   unique_labels.add('graphic-media')

     

       95
       95
       +
               

     

       96
       96
       +
               if any(tag in cw for tag in ADULT_LABEL):

     

       97
       97
       +
                   unique_labels.add('sexual')

     

       98
       98
       +
               

     

       99
       99
       +
               if any(tag in cw for tag in PORN_LABEL):

     

       100
       100
       +
                   unique_labels.add('porn')

     

       101
       101
       +
               

     

       102
       102
       +
               if post.is_sensitive():

     

       103
       103
       +
                   unique_labels.add('graphic-media')

     

       104
       104
       +
               

     

       105
       105
       +
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in unique_labels])

     

       106
       106
       +
               

     

       107
       107
       +
               split_tokens: list[list[cross.Token]] = util.split_tokens(post.get_tokens(), 300)

     

       108
       108
       +
               post_text: list[client_utils.TextBuilder] = []

     

       109
       109
       +
               

     

       110
       110
       +
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       111
       111
       +
               for block in split_tokens:

     

       112
       112
       +
                   rich_text = tokens_to_richtext(block)

     

       113
       113
       +
                   

     

       114
       114
       +
                   if not rich_text:

     

       115
       115
       +
                       LOGGER.error("Skipping '%s' as it contains invalid rich text types!", post.get_id())

     

       116
       116
       +
                       return

     

       117
       117
       +
                   post_text.append(rich_text)

     

       118
       118
       +
               

     

       119
       119
       +
               if not post_text:

     

       120
       120
       +
                   post_text = [client_utils.TextBuilder().text('')]

     

       121
       121
       +
               

     

       122
       122
       +
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       123
       123
       +
               attachments = post.get_attachments()

     

       124
       124
       +
               if not attachments:

     

       125
       125
       +
                   for text in post_text:

     

       126
       126
       +
                       if reply_ref and root_ref:

     

       127
       127
       +
                           new_post = self.bsky.send_post(text, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       128
       128
       +
                               parent=reply_ref,

     

       129
       129
       +
                               root=root_ref

     

       130
       130
       +
                           ), labels=labels)

     

       131
       131
       +
                       else:

     

       132
       132
       +
                           new_post = self.bsky.send_post(text, labels=labels)

     

       133
       133
       +
                           root_ref = models.create_strong_ref(new_post)

     

       134
       134
       +
                       

     

       135
       135
       +
                       self.bsky.create_gates(self.options, new_post.uri)

     

       136
       136
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       137
       137
       +
                       created_records.append(new_post)

     

       138
       138
       +
               elif len(attachments) <= 4:

     

       139
       139
       +
                   if len(attachments) == 1 and attachments[0].get_type() == 'video':

     

       140
       140
       +
                       video_data = attachments[0]

     

       141
       141
       +
                       

     

       142
       142
       +
                       video_io = media_util.download_blob(video_data.get_url(), max_bytes=100_000_000)

     

       143
       143
       +
                       if not video_io:

     

       144
       144
       +
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large?", post.get_id())

     

       145
       145
       +
                           return

     

       146
       146
       +
                       

     

       147
       147
       +
                       metadata = video_data.create_meta(video_io)

     

       148
       148
       +
                       if metadata.get_duration() > 180:

     

       149
       149
       +
                           LOGGER.info("Skipping post_id '%s', video attachment too long!", post.get_id())

     

       150
       150
       +
                           return

     

       151
       151
       +
                       

     

       152
       152
       +
                       aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       153
       153
       +
                           width=metadata.get_width(), 

     

       154
       154
       +
                           height=metadata.get_height()

     

       155
       155
       +
                       )

     

       156
       156
       +
                       

     

       157
       157
       +
                       new_post = self.bsky.send_video(

     

       158
       158
       +
                           text=post_text[0],

     

       159
       159
       +
                           video=video_io,

     

       160
       160
       +
                           video_aspect_ratio=aspect_ratio,

     

       161
       161
       +
                           video_alt=video_data.get_alt(),

     

       162
       162
       +
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       163
       163
       +
                               parent=reply_ref,

     

       164
       164
       +
                               root=root_ref

     

       165
       165
       +
                           ) if root_ref and reply_ref else None,

     

       166
       166
       +
                           labels=labels

     

       167
       167
       +
                       )

     

       168
       168
       +
                       if not root_ref:

     

       169
       169
       +
                           root_ref = models.create_strong_ref(new_post)

     

       170
       170
       +
                           

     

       171
       171
       +
                       self.bsky.create_gates(self.options, new_post.uri)

     

       172
       172
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       173
       173
       +
                   else:

     

       174
       174
       +
                       for attachment in attachments:

     

       175
       175
       +
                           if attachment.get_type() != 'image':

     

       176
       176
       +
                               LOGGER.info("Skipping post_id '%s'. Attachment type mismatch. got: '%s' expected: 'image'", post.get_id(), attachment.get_type())

     

       177
       177
       +
                               return

     

       178
       178
       +
                       

     

       179
       179
       +
                       images: list[bytes] = []

     

       180
       180
       +
                       image_alts: list[str] = []

     

       181
       181
       +
                       image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       182
       182
       +
                       for attachment in attachments:

     

       183
       183
       +
                           image_io = media_util.download_blob(attachment.get_url(), max_bytes=2_000_000)

     

       184
       184
       +
                           if not image_io:

     

       185
       185
       +
                               LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large?", post.get_id())

     

       186
       186
       +
                               return

     

       187
       187
       +
                           LOGGER.info("Converting %s to .webp...", attachment.get_url())

     

       188
       188
       +
                           image_io = media_util.compress_image(image_io, quality=100)

     

       189
       189
       +
                           metadata = attachment.create_meta(image_io)

     

       190
       190
       +
                           

     

       191
       191
       +
                           if len(image_io) > 1_000_000:

     

       192
       192
       +
                               LOGGER.info("Compressing %s...", attachment.get_url())

     

       193
       193
       +
                           

     

       194
       194
       +
                           images.append(image_io)

     

       195
       195
       +
                           image_alts.append(attachment.get_alt())

     

       196
       196
       +
                           image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(

     

       197
       197
       +
                               width=metadata.get_width(), 

     

       198
       198
       +
                               height=metadata.get_height()

     

       199
       199
       +
                           ))

     

       200
       200
       +
                           

     

       201
       201
       +
                       new_post = self.bsky.send_images(

     

       202
       202
       +
                           text=post_text[0],

     

       203
       203
       +
                           images=images,

     

       204
       204
       +
                           image_alts=image_alts,

     

       205
       205
       +
                           image_aspect_ratios=image_aspect_ratios,

     

       206
       206
       +
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       207
       207
       +
                               parent=reply_ref,

     

       208
       208
       +
                               root=root_ref

     

       209
       209
       +
                           ) if root_ref and reply_ref else None, 

     

       210
       210
       +
                           labels=labels

     

       211
       211
       +
                       )

     

       212
       212
       +
                       if not root_ref:

     

       213
       213
       +
                           root_ref = models.create_strong_ref(new_post)

     

       214
       214
       +
                           

     

       215
       215
       +
                       self.bsky.create_gates(self.options, new_post.uri)

     

       216
       216
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       217
       217
       +
                       

     

       218
       218
       +
                   created_records.append(new_post)

     

       219
       219
       +
                   for text in post_text[1:]:

     

       220
       220
       +
                       new_post = self.bsky.send_post(text, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       221
       221
       +
                           parent=reply_ref,

     

       222
       222
       +
                           root=root_ref

     

       223
       223
       +
                       ), labels=labels)

     

       224
       224
       +
                       self.bsky.create_gates(self.options, new_post.uri)

     

       225
       225
       +
                       

     

       226
       226
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       227
       227
       +
                       created_records.append(new_post)

     

       228
       228
       +
               else:

     

       229
       229
       +
                   LOGGER.info("Skipping post_id '%s', too many attachments!", post.get_id())

     

       230
       230
       +
                   return 

     

       231
       231
       +
               

     

       232
       232
       +
               if not created_records:

     

       233
       233
       +
                   LOGGER.info("Skipped post_id '%s', for some reason...")

     

       234
       234
       +
               

     

       235
       235
       +
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       236
       236
       +
               assert db_post, "ghghghhhhh"

     

       237
       237
       +
               

     

       238
       238
       +
               db_identifiers = [json.dumps(cr.model_dump(), sort_keys=True) for cr in created_records]

     

       239
       239
       +
               

     

       240
       240
       +
               if new_root_id is None or  new_parent_id is None:

     

       241
       241
       +
                   new_root_id = database.insert_post(

     

       242
       242
       +
                       self.db,

     

       243
       243
       +
                       db_identifiers[0],

     

       244
       244
       +
                       login.did,

     

       245
       245
       +
                       SERVICE

     

       246
       246
       +
                   )

     

       247
       247
       +
                   new_parent_id = new_root_id

     

       248
       248
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       249
       249
       +
                   db_identifiers = db_identifiers[1:]

     

       250
       250
       +
               

     

       251
       251
       +
               for db_id in db_identifiers:

     

       252
       252
       +
                   new_parent_id = database.insert_reply(

     

       253
       253
       +
                       self.db, 

     

       254
       254
       +
                       db_id,

     

       255
       255
       +
                       login.did,

     

       256
       256
       +
                       SERVICE,

     

       257
       257
       +
                       new_parent_id,

     

       258
       258
       +
                       new_root_id

     

       259
       259
       +
                   )

     

       260
       260
       +
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       261
       261
       +
                   

     

       262
       262
       +
           def delete_post(self, identifier: str):

     

       263
       263
       +
               login = self.client.me

     

       264
       264
       +
               if not login:

     

       265
       265
       +
                   raise Exception("Client not logged in!")

     

       266
       266
       +
               

     

       267
       267
       +
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       268
       268
       +
               if not post:

     

       269
       269
       +
                   return

     

       270
       270
       +
               

     

       271
       271
       +
               mappings = database.find_mappings(self.db, post['id'], SERVICE, login.did)

     

       272
       272
       +
               for mapping in mappings[::-1]:

     

       273
       273
       +
                   self.client.delete_post(json.loads(mapping[0])['uri'])

     

       274
       274
       +
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       275
       275
       +
                   

     

       3
       276
        
       

     

       4
       277
        
       class Bluesky():

     

       5
       278
        
           def __init__(self, client: Client) -> None:

     
···

       136
       409
        
                   

     

       137
       410
        
                   self.client.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)

     

       138
       411
        
                   

     

       139
       139
       -
               if options['quote_gate']:

     

       412
       412
       +
               if options.get('quote_gate', False):

     

       140
       413
        
                   post_gate = models.AppBskyFeedPostgate.Record(

     

       141
       414
        
                       post=post_uri,

     

       142
       415
        
                       created_at=time,

     
···

       148
       421
        
                   self.client.app.bsky.feed.postgate.create(account.did, post_gate, rkey)

     

       149
       422
        
           

     

       150
       423
        
       

     

       151
       151
       -
       def tokens_to_richtext(tokens: list[dict]) -> client_utils.TextBuilder | None:

     

       152
       152
       -
           builder: client_utils.TextBuilder = client_utils.TextBuilder()

     

       153
       153
       -
           

     

       424
       424
       +
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       425
       425
       +
           builder = client_utils.TextBuilder()

     

       426
       426
       +
            

     

       154
       427
        
           for token in tokens:

     

       155
       155
       -
               token_type = token['type']

     

       156
       156
       -
               

     

       157
       157
       -
               if token_type == 'text':

     

       158
       158
       -
                   builder.text(token['content'])

     

       159
       159
       -
               elif token_type == 'hashtag':

     

       160
       160
       -
                   builder.tag('#' + token['tag'], token['tag'])

     

       161
       161
       -
               elif token_type == 'link':

     

       162
       162
       -
                   builder.link(token['text'], token['url'])

     

       428
       428
       +
               if isinstance(token, cross.TextToken):

     

       429
       429
       +
                   builder.text(token.text)

     

       430
       430
       +
               elif isinstance(token, cross.LinkToken):

     

       431
       431
       +
                   builder.link(token.label, token.href)

     

       432
       432
       +
               elif isinstance(token, cross.TagToken):

     

       433
       433
       +
                   builder.tag('#' + token.tag, token.tag)

     

       163
       434
        
               else:

     

       164
       164
       -
                   # Fail on mention!

     

       435
       435
       +
                   # fail on unsupported tokens

     

       165
       436
        
                   return None

     

       166
       166
       -
           

     

       437
       437
       +
               

     

       167
       438
        
           return builder

+109

cross.py

···

       1
       1
       +
       from typing import Callable, Any

     

       2
       2
       +
       from database import DataBaseWorker

     

       3
       3
       +
       

     

       4
       4
       +
       # generic token

     

       5
       5
       +
       class Token():

     

       6
       6
       +
           def __init__(self, type: str) -> None:

     

       7
       7
       +
               self.type = type

     

       8
       8
       +
       

     

       9
       9
       +
       class TextToken(Token):

     

       10
       10
       +
           def __init__(self, text: str) -> None:

     

       11
       11
       +
               super().__init__('text')

     

       12
       12
       +
               self.text = text

     

       13
       13
       +
       

     

       14
       14
       +
       # token that represents a link to a website. e.g. [link](https://google.com/)

     

       15
       15
       +
       class LinkToken(Token):

     

       16
       16
       +
           def __init__(self, href: str, label: str) -> None:

     

       17
       17
       +
               super().__init__('link')

     

       18
       18
       +
               self.href = href

     

       19
       19
       +
               self.label = label

     

       20
       20
       +
             

     

       21
       21
       +
       # token that represents a hashtag. e.g. #SocialMedia  

     

       22
       22
       +
       class TagToken(Token):

     

       23
       23
       +
           def __init__(self, tag: str) -> None:

     

       24
       24
       +
               super().__init__('tag')

     

       25
       25
       +
               self.tag = tag

     

       26
       26
       +
       

     

       27
       27
       +
       # token that represents a mention of a user.

     

       28
       28
       +
       class MentionToken(Token):

     

       29
       29
       +
           def __init__(self, username: str, uri: str) -> None:

     

       30
       30
       +
               super().__init__('mention')

     

       31
       31
       +
               self.username = username

     

       32
       32
       +
               self.uri = uri

     

       33
       33
       +
           

     

       34
       34
       +
       class MediaMeta():

     

       35
       35
       +
           def __init__(self, width: int, height: int, duration: float) -> None:

     

       36
       36
       +
               self.width = width

     

       37
       37
       +
               self.height = height

     

       38
       38
       +
               self.duration = duration

     

       39
       39
       +
           

     

       40
       40
       +
           def get_width(self) -> int:

     

       41
       41
       +
               return self.width

     

       42
       42
       +
           

     

       43
       43
       +
           def get_height(self) -> int:

     

       44
       44
       +
               return self.height

     

       45
       45
       +
           

     

       46
       46
       +
           def get_duration(self) -> float:

     

       47
       47
       +
               return self.duration

     

       48
       48
       +
       

     

       49
       49
       +
       class MediaAttachment():

     

       50
       50
       +
           def __init__(self) -> None:

     

       51
       51
       +
               pass

     

       52
       52
       +
           

     

       53
       53
       +
           def create_meta(self, bytes: bytes) -> MediaMeta:

     

       54
       54
       +
               return MediaMeta(-1, -1, -1)

     

       55
       55
       +
           

     

       56
       56
       +
           def get_url(self) -> str:

     

       57
       57
       +
               return ''

     

       58
       58
       +
           

     

       59
       59
       +
           def get_type(self) -> str | None:

     

       60
       60
       +
               return None

     

       61
       61
       +
           

     

       62
       62
       +
           def get_alt(self) -> str:

     

       63
       63
       +
               return ''

     

       64
       64
       +
           

     

       65
       65
       +
       class Post():

     

       66
       66
       +
           def __init__(self) -> None:

     

       67
       67
       +
               pass

     

       68
       68
       +
           

     

       69
       69
       +
           def get_tokens(self) -> list[Token]:

     

       70
       70
       +
               return []

     

       71
       71
       +
           

     

       72
       72
       +
           def get_parent_id(self) -> str:

     

       73
       73
       +
               return ''

     

       74
       74
       +
           

     

       75
       75
       +
           def get_attachments(self) -> list[MediaAttachment]:

     

       76
       76
       +
               return []

     

       77
       77
       +
           

     

       78
       78
       +
           def get_id(self) -> str:

     

       79
       79
       +
               return ''

     

       80
       80
       +
           

     

       81
       81
       +
           def get_cw(self) -> str:

     

       82
       82
       +
               return ''

     

       83
       83
       +
           

     

       84
       84
       +
           def is_sensitive(self) -> bool:

     

       85
       85
       +
               return False

     

       86
       86
       +
       

     

       87
       87
       +
       # generic input service.

     

       88
       88
       +
       # user and service for db queries

     

       89
       89
       +
       class Input():

     

       90
       90
       +
           def __init__(self, service: str, user_id: str, settings: dict, db: DataBaseWorker) -> None:

     

       91
       91
       +
               self.service = service

     

       92
       92
       +
               self.user_id = user_id

     

       93
       93
       +
               self.settings = settings

     

       94
       94
       +
               self.db = db

     

       95
       95
       +
           

     

       96
       96
       +
           async def listen(self, handler: Callable[[Post], Any]):

     

       97
       97
       +
               pass

     

       98
       98
       +
       

     

       99
       99
       +
       class Output():

     

       100
       100
       +
           def __init__(self, input: Input, settings: dict, db: DataBaseWorker) -> None:

     

       101
       101
       +
               self.input = input

     

       102
       102
       +
               self.settings = settings

     

       103
       103
       +
               self.db = db

     

       104
       104
       +
           

     

       105
       105
       +
           def accept_post(self, post: Post):

     

       106
       106
       +
               pass

     

       107
       107
       +
           

     

       108
       108
       +
           def delete_post(self, identifier: str):

     

       109
       109
       +
               pass

+118 -108

database.py

···

       1
       1
        
       import sqlite3

     

       2
       2
       -
       import json

     

       2
       2
       +
       from concurrent.futures import Future

     

       3
       3
       +
       import threading

     

       4
       4
       +
       import queue

     

       3
       5
        
       

     

       4
       4
       -
       import sqlite3

     

       5
       5
       -
       import json

     

       6
       6
       -
       

     

       7
       7
       -
       class DataBase():

     

       8
       8
       -
       

     

       9
       9
       -
           def __init__(self, path: str) -> None:

     

       10
       10
       -
               self.path = path

     

       11
       11
       -
               connection = sqlite3.connect(self.path, autocommit=True)

     

       12
       12
       -
               cursor = connection.cursor()

     

       13
       13
       -
               cursor.execute('''

     

       14
       14
       -
                   CREATE TABLE IF NOT EXISTS posts (

     

       15
       15
       -
                       id TEXT,

     

       16
       16
       -
                       user_id TEXT,

     

       17
       17
       -
                       data TEXT,

     

       18
       18
       -
                       PRIMARY KEY (id, user_id)

     

       19
       19
       -
                   )

     

       20
       20
       -
               ''')

     

       21
       21
       -
               cursor.close()

     

       6
       6
       +
       class DataBaseWorker():

     

       7
       7
       +
           def __init__(self, database: str) -> None:

     

       8
       8
       +
               super(DataBaseWorker, self).__init__()

     

       9
       9
       +
               self.database = database

     

       10
       10
       +
               self.queue = queue.Queue()

     

       11
       11
       +
               self.thread = threading.Thread(target=self._run, daemon=True)

     

       12
       12
       +
               self.shutdown_event = threading.Event()

     

       13
       13
       +
               self.conn = sqlite3.connect(self.database, check_same_thread=False)

     

       14
       14
       +
               self.lock = threading.Lock()

     

       15
       15
       +
               self.thread.start()

     

       16
       16
       +
           

     

       17
       17
       +
           def _run(self):

     

       18
       18
       +
               while not self.shutdown_event.is_set():

     

       19
       19
       +
                   try:

     

       20
       20
       +
                       task, future = self.queue.get(timeout=1)

     

       21
       21
       +
                       try:

     

       22
       22
       +
                           with self.lock:

     

       23
       23
       +
                               result = task(self.conn)

     

       24
       24
       +
                           future.set_result(result)

     

       25
       25
       +
                       except Exception as e:

     

       26
       26
       +
                           future.set_exception(e)

     

       27
       27
       +
                       finally:

     

       28
       28
       +
                           self.queue.task_done()

     

       29
       29
       +
                   except queue.Empty:

     

       30
       30
       +
                       continue

     

       22
       31
        
           

     

       23
       23
       -
           def connect(self) -> sqlite3.Connection:

     

       24
       24
       -
               return sqlite3.connect(self.path, autocommit=True)

     

       25
       25
       -
       

     

       26
       26
       -
           def put_post(self, db: sqlite3.Connection, user_id: str, id: str, data: dict):

     

       27
       27
       -
               cursor = db.cursor()

     

       28
       28
       -
               cursor.execute('''

     

       29
       29
       -
                   INSERT OR REPLACE INTO posts (id, user_id, data) VALUES (?, ?, ?)

     

       30
       30
       -
               ''', (id, user_id, json.dumps(data)))

     

       31
       31
       -
               cursor.close()

     

       32
       32
       -
       

     

       33
       33
       -
           def del_post(self, db: sqlite3.Connection, user_id: str, id: str):

     

       34
       34
       -
               cursor = db.cursor()

     

       35
       35
       -
               cursor.execute('''

     

       36
       36
       -
                   DELETE FROM posts WHERE id = ? AND user_id = ?

     

       37
       37
       -
               ''', (id, user_id))

     

       38
       38
       -
               cursor.close()

     

       32
       32
       +
           def execute(self, sql: str, params = ()):

     

       33
       33
       +
               def task(conn: sqlite3.Connection):

     

       34
       34
       +
                   cursor = conn.execute(sql, params)

     

       35
       35
       +
                   conn.commit()

     

       36
       36
       +
                   return cursor.fetchall()

     

       39
       37
        
               

     

       40
       40
       -
           def read_data(self, db: sqlite3.Connection, user_id: str, id: str) -> dict | None:

     

       41
       41
       -
               cursor = db.cursor()

     

       42
       42
       -
               cursor.execute('''

     

       43
       43
       -
                   SELECT data FROM posts WHERE id = ? AND user_id = ?

     

       44
       44
       -
               ''', (id, user_id))

     

       45
       45
       -
               row = cursor.fetchone()

     

       46
       46
       -
               cursor.close()

     

       47
       47
       -
               if row:

     

       48
       48
       -
                   data_json = row[0]

     

       49
       49
       -
                   return json.loads(data_json)

     

       50
       50
       -
               return None

     

       51
       51
       -
       

     

       52
       52
       -
           def get_all_children(self, db: sqlite3.Connection, user_id: str, id: str) -> dict[str, dict]:

     

       53
       53
       -
               cursor = db.cursor()

     

       54
       54
       -
               cursor.execute('''

     

       55
       55
       -
                   WITH RECURSIVE thread_cte (id, user_id, data, current_post_uri) AS (

     

       56
       56
       -
                       SELECT

     

       57
       57
       -
                           T1.id,

     

       58
       58
       -
                           T1.user_id,

     

       59
       59
       -
                           T1.data,

     

       60
       60
       -
                           json_extract(

     

       61
       61
       -
                               T1.data,

     

       62
       62
       -
                               '$.mapped_post_refs[' || (json_array_length(T1.data, '$.mapped_post_refs') - 1) || '].uri'

     

       63
       63
       -
                           ) AS current_post_uri

     

       64
       64
       -
                       FROM

     

       65
       65
       -
                           posts AS T1

     

       66
       66
       -
                       WHERE

     

       67
       67
       -
                           T1.id = ? AND T1.user_id = ?

     

       68
       68
       -
       

     

       69
       69
       -
                       UNION ALL

     

       70
       70
       -
       

     

       71
       71
       -
                       SELECT

     

       72
       72
       -
                           C.id,

     

       73
       73
       -
                           C.user_id,

     

       74
       74
       -
                           C.data,

     

       75
       75
       -
                           json_extract(

     

       76
       76
       -
                               C.data,

     

       77
       77
       -
                               '$.mapped_post_refs[' || (json_array_length(C.data, '$.mapped_post_refs') - 1) || '].uri'

     

       78
       78
       -
                           ) AS current_post_uri

     

       79
       79
       -
                       FROM

     

       80
       80
       -
                           posts AS C

     

       81
       81
       -
                       JOIN

     

       82
       82
       -
                           thread_cte AS P ON json_extract(C.data, '$.parent_ref.uri') = P.current_post_uri

     

       83
       83
       -
                       WHERE

     

       84
       84
       -
                           C.user_id = ?

     

       85
       85
       -
                   )

     

       86
       86
       -
                   SELECT id, data FROM thread_cte;

     

       87
       87
       -
               ''', (id, user_id, user_id))

     

       88
       88
       -
               raw_data = cursor.fetchall()

     

       89
       89
       -
               cursor.close()

     

       38
       38
       +
               future = Future()

     

       39
       39
       +
               self.queue.put((task, future))

     

       40
       40
       +
               return future.result()

     

       41
       41
       +
           

     

       42
       42
       +
           def close(self):

     

       43
       43
       +
               self.shutdown_event.set()

     

       44
       44
       +
               self.thread.join()

     

       45
       45
       +
               with self.lock:

     

       46
       46
       +
                   self.conn.close()

     

       90
       47
        
       

     

       91
       91
       -
               if not raw_data:

     

       92
       92
       -
                   return {}

     

       48
       48
       +
       def insert_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str) -> int:

     

       49
       49
       +
           db.execute(

     

       50
       50
       +
               """

     

       51
       51
       +
               INSERT INTO posts (user_id, service, identifier)

     

       52
       52
       +
               VALUES (?, ?, ?);

     

       53
       53
       +
               """, (user_id, serivce, identifier))

     

       54
       54
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       93
       55
        
       

     

       94
       94
       -
               data: dict[str, dict] = {}

     

       95
       95
       -
               for post_id, post_data in raw_data:

     

       96
       96
       -
                   data[post_id] = json.loads(post_data)

     

       56
       56
       +
       def insert_reply(db: DataBaseWorker, identifier: str, user_id: str, serivce: str, parent: int, root: int) -> int:

     

       57
       57
       +
           db.execute(

     

       58
       58
       +
               """

     

       59
       59
       +
               INSERT INTO posts (user_id, service, identifier, parent_id, root_id)

     

       60
       60
       +
               VALUES (?, ?, ?, ?, ?);

     

       61
       61
       +
               """, (user_id, serivce, identifier, parent, root))

     

       62
       62
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       97
       63
        
       

     

       98
       98
       -
               return data

     

       64
       64
       +
       def insert_mapping(db: DataBaseWorker, original: int, mapped: int):

     

       65
       65
       +
           db.execute("""

     

       66
       66
       +
           INSERT INTO mappings (original_post_id, mapped_post_id)

     

       67
       67
       +
           VALUES (?, ?);

     

       68
       68
       +
           """, (original, mapped))

     

       99
       69
        
       

     

       100
       100
       -
       class UserScopedDB:

     

       101
       101
       -
           def __init__(self, db: DataBase, user_id: str):

     

       102
       102
       -
               self.db = db

     

       103
       103
       -
               self.user_id = user_id

     

       70
       70
       +
       def delete_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str):

     

       71
       71
       +
           db.execute(

     

       72
       72
       +
               """

     

       73
       73
       +
               DELETE FROM posts

     

       74
       74
       +
               WHERE identifier = ?

     

       75
       75
       +
                 AND service = ?

     

       76
       76
       +
                 AND user_id = ?

     

       77
       77
       +
               """, (identifier, serivce, user_id))

     

       78
       78
       +
           

     

       104
       79
        
       

     

       105
       105
       -
           def connect(self) -> sqlite3.Connection:

     

       106
       106
       -
               return self.db.connect()

     

       107
       107
       -
       

     

       108
       108
       -
           def put_post(self, db: sqlite3.Connection, id: str, data: dict):

     

       109
       109
       -
               return self.db.put_post(db, self.user_id, id, data)

     

       110
       110
       -
       

     

       111
       111
       -
           def del_post(self, db: sqlite3.Connection, id: str):

     

       112
       112
       -
               return self.db.del_post(db, self.user_id, id)

     

       113
       113
       -
       

     

       114
       114
       -
           def read_data(self, db: sqlite3.Connection, id: str) -> dict | None:

     

       115
       115
       -
               return self.db.read_data(db, self.user_id, id)

     

       80
       80
       +
       def find_mappings(db: DataBaseWorker, original_post: int, service: str, user_id: str) -> list[str]:

     

       81
       81
       +
           return db.execute(

     

       82
       82
       +
               """

     

       83
       83
       +
               SELECT p.identifier

     

       84
       84
       +
               FROM posts AS p

     

       85
       85
       +
               JOIN mappings AS m

     

       86
       86
       +
                 ON p.id = m.mapped_post_id

     

       87
       87
       +
               WHERE m.original_post_id = ?

     

       88
       88
       +
                 AND p.service = ?

     

       89
       89
       +
                 AND p.user_id = ?

     

       90
       90
       +
               ORDER BY p.id;

     

       91
       91
       +
               """,

     

       92
       92
       +
               (original_post, service, user_id))

     

       93
       93
       +
           

     

       94
       94
       +
       def find_post_by_id(db: DataBaseWorker, id: int) -> dict | None:

     

       95
       95
       +
           result = db.execute(

     

       96
       96
       +
               """

     

       97
       97
       +
               SELECT user_id, service, identifier, parent_id, root_id

     

       98
       98
       +
               FROM posts 

     

       99
       99
       +
               WHERE id = ?

     

       100
       100
       +
               """, (id,))

     

       101
       101
       +
           if not result:

     

       102
       102
       +
               return None

     

       103
       103
       +
           user_id, service, identifier, parent_id, root_id = result[0]

     

       104
       104
       +
           return {

     

       105
       105
       +
               'user_id': user_id,

     

       106
       106
       +
               'service': service,

     

       107
       107
       +
               'identifier': identifier,

     

       108
       108
       +
               'parent_id': parent_id,

     

       109
       109
       +
               'root_id': root_id

     

       110
       110
       +
           }

     

       116
       111
        
       

     

       117
       117
       -
           def get_all_children(self, db: sqlite3.Connection, id: str) -> dict[str, dict]:

     

       118
       118
       -
               return self.db.get_all_children(db, self.user_id, id)

     

       112
       112
       +
       def find_post(db: DataBaseWorker, identifier: str, user_id: str, service: str) -> dict | None:

     

       113
       113
       +
           result = db.execute(

     

       114
       114
       +
               """

     

       115
       115
       +
               SELECT id, parent_id, root_id

     

       116
       116
       +
               FROM posts 

     

       117
       117
       +
               WHERE identifier = ? 

     

       118
       118
       +
                 AND user_id = ? 

     

       119
       119
       +
                 AND service = ?

     

       120
       120
       +
               """, (identifier, user_id, service))

     

       121
       121
       +
           if not result:

     

       122
       122
       +
               return None

     

       123
       123
       +
           id, parent_id, root_id = result[0]

     

       124
       124
       +
           return {

     

       125
       125
       +
               'id': id,

     

       126
       126
       +
               'parent_id': parent_id,

     

       127
       127
       +
               'root_id': root_id

     

       128
       128
       +
           }

+91 -378

main.py

···

       1
       1
       -
       import click

     

       1
       1
       +
       from util import LOGGER

     

       2
       2
       +
       import os

     

       2
       3
        
       import json

     

       3
       3
       -
       import asyncio, threading, queue

     

       4
       4
       -
       from atproto import IdResolver, Client, client_utils

     

       5
       5
       -
       import atproto_client.models as models

     

       6
       6
       -
       import util, mastodon, bluesky, database

     

       7
       7
       -
       import os

     

       8
       8
       -
       import media_util

     

       9
       9
       -
       import traceback

     

       4
       4
       +
       import database

     

       5
       5
       +
       import mastodon, bluesky, cross

     

       6
       6
       +
       import asyncio, threading, queue, traceback

     

       10
       7
        
       

     

       11
       11
       -
       ADULT_LABEL = ["sexual content", "nsfw"]

     

       12
       12
       -
       PORN_LABEL = ["porn", "yiff"]

     

       8
       8
       +
       DEFAULT_SETTINGS: dict = {

     

       9
       9
       +
           'input': {

     

       10
       10
       +
               'type': 'mastodon-wss',

     

       11
       11
       +
               'instance': 'env:MASTODON_INSTANCE',

     

       12
       12
       +
               'token': 'env:MASTODON_TOKEN',

     

       13
       13
       +
               "options": {

     

       14
       14
       +
                   "allowed_visibility": [

     

       15
       15
       +
                       "public",

     

       16
       16
       +
                       "unlisted"

     

       17
       17
       +
                   ]

     

       18
       18
       +
               }

     

       19
       19
       +
           },

     

       20
       20
       +
           'output': [

     

       21
       21
       +
               {

     

       22
       22
       +
                   'type': 'bluesky',

     

       23
       23
       +
                   'handle': 'env:BLUESKY_HANDLE',

     

       24
       24
       +
                   'app-password': 'env:BLUESKY_APP_PASSWORD',

     

       25
       25
       +
                   'options': {

     

       26
       26
       +
                       'quote_gate': False,

     

       27
       27
       +
                       'thread_gate': [

     

       28
       28
       +
                           'everybody'

     

       29
       29
       +
                       ]

     

       30
       30
       +
                   }

     

       31
       31
       +
               }

     

       32
       32
       +
           ]

     

       33
       33
       +
       }

     

       13
       34
        
       

     

       14
       14
       -
       class SocketListener():

     

       15
       15
       -
           def __init__(self, user_id: str, atproto: Client, settings: dict, db_path: str) -> None:

     

       16
       16
       -
               self.user_id = user_id

     

       17
       17
       -
               self.atp = bluesky.Bluesky(atproto)

     

       18
       18
       -
               self.settings = settings

     

       19
       19
       -
               self.db = database.UserScopedDB(database.DataBase(db_path), user_id)

     

       20
       20
       -
           

     

       21
       21
       -
           def create_post_records(self, status: dict) -> list[models.AppBskyFeedPost.CreateRecordResponse] | None:

     

       22
       22
       -
               tokens: list[dict] = util.tokenize_html(status['content'])

     

       23
       23
       -
               

     

       24
       24
       -
               label_text: set[str] = set()

     

       25
       25
       -
               status_spoiler = status['spoiler_text']

     

       26
       26
       -
               if status_spoiler:

     

       27
       27
       -
                   tokens.insert(0, {"type": "text", "content": "CW: " + status_spoiler + '\n\n'})

     

       28
       28
       -
                   label_text.add('graphic-media')

     

       29
       29
       -
               

     

       30
       30
       -
               if any(tag in status_spoiler for tag in ADULT_LABEL):

     

       31
       31
       -
                   label_text.add('sexual')

     

       32
       32
       -
               

     

       33
       33
       -
               if any(tag in status_spoiler for tag in PORN_LABEL):

     

       34
       34
       -
                   label_text.add('porn')

     

       35
       35
       -
               

     

       36
       36
       -
               if status['sensitive']:

     

       37
       37
       -
                   label_text.add('graphic-media')

     

       38
       38
       -
               

     

       39
       39
       -
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in label_text])

     

       40
       40
       -
               

     

       41
       41
       -
               split_tokens: list[list[dict]] = util.split_tokens(tokens, 300)

     

       42
       42
       -
               

     

       43
       43
       -
               post_text: list[client_utils.TextBuilder] = []

     

       44
       44
       -
               for funnel in split_tokens:

     

       45
       45
       -
                   rich_text = bluesky.tokens_to_richtext(funnel)

     

       46
       46
       -
                   

     

       47
       47
       -
                   if rich_text is None:

     

       48
       48
       -
                       click.echo(f"Skipping '{status["id"]}' as it contains invalid rich text types!")

     

       49
       49
       -
                       return None

     

       50
       50
       -
                   post_text.append(rich_text)

     

       51
       51
       -
                   

     

       52
       52
       -
               if not post_text:

     

       53
       53
       -
                   post_text = [client_utils.TextBuilder().text('')]

     

       54
       54
       -
               

     

       55
       55
       -
               records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       56
       56
       -
               

     

       57
       57
       -
               in_reply_to_id: str = status['in_reply_to_id']

     

       58
       58
       -
               

     

       59
       59
       -
               root_ref = None

     

       60
       60
       -
               reply_ref = None

     

       61
       61
       -
               if in_reply_to_id:

     

       62
       62
       -
                   db = self.db.connect()

     

       63
       63
       -
                   data: dict | None = self.db.read_data(db, in_reply_to_id)

     

       64
       64
       -
                   db.close()

     

       65
       65
       -
                   

     

       66
       66
       -
                   if data is not None:

     

       67
       67
       -
                       root_data = data['root_ref']

     

       68
       68
       -
                       if not root_data:

     

       69
       69
       -
                           root_data = data['mapped_post_refs'][0]

     

       70
       70
       -
                       

     

       71
       71
       -
                       reply_data = data['mapped_post_refs'][-1]

     

       72
       72
       -
                       

     

       73
       73
       -
                       root_post = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_data['uri']), cid=str(root_data['cid']))

     

       74
       74
       -
                       root_ref = models.create_strong_ref(root_post)

     

       75
       75
       -
                       

     

       76
       76
       -
                       reply_post = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_data['uri']), cid=str(reply_data['cid']))

     

       77
       77
       -
                       reply_ref = models.create_strong_ref(reply_post)

     

       78
       78
       -
               

     

       79
       79
       -
               attachments: list[dict] = status['media_attachments']

     

       80
       80
       -
               if not attachments:

     

       81
       81
       -
                   for post in post_text:

     

       82
       82
       -
                       if reply_ref and root_ref:

     

       83
       83
       -
                           new_post = self.atp.send_post(post, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       84
       84
       -
                               parent=reply_ref,

     

       85
       85
       -
                               root=root_ref

     

       86
       86
       -
                           ), labels=labels)

     

       87
       87
       -
                       else:

     

       88
       88
       -
                           new_post = self.atp.send_post(post, labels=labels)

     

       89
       89
       -
                           root_ref = models.create_strong_ref(new_post)

     

       90
       90
       -
                       

     

       91
       91
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       92
       92
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       93
       93
       -
                       records.append(new_post)

     

       94
       94
       -
                   

     

       95
       95
       -
                   return records

     

       96
       96
       -
               elif len(attachments) <= 4:

     

       97
       97
       -
                   if len(attachments) == 1 and attachments[0]['type'] == 'video':

     

       98
       98
       -
                       video: dict = attachments[0]

     

       99
       99
       -
                       

     

       100
       100
       -
                       video_io = media_util.download_blob(video['url'], max_bytes=100_000_000)

     

       101
       101
       -
                       if not video_io:

     

       102
       102
       -
                           click.echo(f"Skipping post_id '{status['id']}', failed to download attachment!")

     

       103
       103
       -
                           return None

     

       104
       104
       -
                       

     

       105
       105
       -
                       if len(video_io) > 100_000_000:

     

       106
       106
       -
                           click.echo(f"Skipping post_id '{status['id']}'. Video file too large")

     

       107
       107
       -
                           return None

     

       108
       108
       -
                       

     

       109
       109
       -
                       # some mastodon api implementations don't seem to provide video meta

     

       110
       110
       -
                       # try to probe it with ffmpeg

     

       111
       111
       -
                       meta = media_util.get_video_meta(video_io)

     

       112
       112
       -
                       if meta.get('duration', -1) > 180:

     

       113
       113
       -
                           click.echo(f"Skipping post_id '{status["id"]}'. Video attachment too long!")

     

       114
       114
       -
                           return None

     

       115
       115
       -
                       

     

       116
       116
       -
                       aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(width=meta['width'], height=meta['height'])

     

       117
       117
       -
                       

     

       118
       118
       -
                       new_post = self.atp.send_video(

     

       119
       119
       -
                           text=post_text[0],

     

       120
       120
       -
                           video=video_io,

     

       121
       121
       -
                           video_aspect_ratio=aspect_ratio,

     

       122
       122
       -
                           video_alt=video['description'] if video['description'] else '',

     

       123
       123
       -
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       124
       124
       -
                               parent=reply_ref,

     

       125
       125
       -
                               root=root_ref

     

       126
       126
       -
                           ) if root_ref and reply_ref else None,

     

       127
       127
       -
                           labels=labels

     

       128
       128
       -
                       )

     

       129
       129
       -
                       if not root_ref:

     

       130
       130
       -
                           root_ref = models.create_strong_ref(new_post)

     

       131
       131
       -
                           

     

       132
       132
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       133
       133
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       134
       134
       -
                   else:

     

       135
       135
       -
                       # check if all attachments are images.

     

       136
       136
       -
                       # bluesky doesn't support gifv and unknown (TODO link the file)

     

       137
       137
       -
                       for attachment in attachments:

     

       138
       138
       -
                           if attachment['type'] != 'image':

     

       139
       139
       -
                               click.echo(f"Skipping post_id '{status['id']}'. Attachment type mismatch. got: '{attachment['type']}' expected: 'image'")

     

       140
       140
       -
                               return None

     

       141
       141
       -
                           

     

       142
       142
       -
                       images: list[bytes] = []

     

       143
       143
       -
                       image_alts: list[str] = []

     

       144
       144
       -
                       image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       145
       145
       -
                       for attachment in attachments:

     

       146
       146
       -
                           

     

       147
       147
       -
                           image_io = media_util.download_blob(attachment['url'], max_bytes=2_000_000)

     

       148
       148
       -
                           if not image_io:

     

       149
       149
       -
                               click.echo(f"Skipping post_id '{status['id']}', failed to download attachment!")

     

       150
       150
       -
                               return None

     

       151
       151
       -
                           

     

       152
       152
       -
                           # Try to compress image if it's too large

     

       153
       153
       -
                           if len(image_io) > 1_000_000:

     

       154
       154
       -
                               click.echo(f"Trying to compress {attachment['url']}..")

     

       155
       155
       -
                               image_io = media_util.compress_image(image_io)

     

       156
       156
       -
                               if len(image_io) > 1_000_000:

     

       157
       157
       -
                                   click.echo(f"Skipping post_id '{status['id']}', media attachment still too large after compression!")

     

       158
       158
       -
                                   return None

     

       159
       159
       -
                               

     

       160
       160
       -
                           meta = util.safe_get(attachment, 'meta', {}).get('original')

     

       161
       161
       -
                           

     

       162
       162
       -
                           # some mastodon api implementations don't seem to provide image meta

     

       163
       163
       -
                           # try to probe it with ffmpeg

     

       164
       164
       -
                           if not meta:

     

       165
       165
       -
                               meta = media_util.get_image_meta(image_io)

     

       166
       166
       -
                           

     

       167
       167
       -
                           images.append(image_io)

     

       168
       168
       -
                           image_alts.append(attachment['description'] if attachment['description'] else '')

     

       169
       169
       -
                           image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(width=meta['width'], height=meta['height']))

     

       170
       170
       -
                       

     

       171
       171
       -
                       new_post = self.atp.send_images(

     

       172
       172
       -
                           text=post_text[0],

     

       173
       173
       -
                           images=images,

     

       174
       174
       -
                           image_alts=image_alts,

     

       175
       175
       -
                           image_aspect_ratios=image_aspect_ratios,

     

       176
       176
       -
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       177
       177
       -
                               parent=reply_ref,

     

       178
       178
       -
                               root=root_ref

     

       179
       179
       -
                           ) if root_ref and reply_ref else None, 

     

       180
       180
       -
                           labels=labels

     

       181
       181
       -
                       )

     

       182
       182
       -
                       if not root_ref:

     

       183
       183
       -
                           root_ref = models.create_strong_ref(new_post)

     

       184
       184
       -
                           

     

       185
       185
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       186
       186
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       187
       187
       -
                   

     

       188
       188
       -
                   records.append(new_post)

     

       189
       189
       -
                   for post in post_text[1:]:

     

       190
       190
       -
                       new_post = self.atp.send_post(post, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       191
       191
       -
                           parent=reply_ref,

     

       192
       192
       -
                           root=root_ref

     

       193
       193
       -
                       ), labels=labels)

     

       194
       194
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       195
       195
       -
                       

     

       196
       196
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       197
       197
       -
                       records.append(new_post)

     

       198
       198
       -
                   

     

       199
       199
       -
                   return records

     

       200
       200
       -
               else:

     

       201
       201
       -
                   click.echo(f"Skipping post_id '{status['id']}'. Too many attachments!")

     

       202
       202
       -
                   return records if records else None

     

       203
       203
       -
           

     

       204
       204
       -
           def on_update(self, status: dict):

     

       205
       205
       -
               if util.safe_get(status, 'account', {})['id'] != self.user_id:

     

       206
       206
       -
                   return

     

       207
       207
       -
               

     

       208
       208
       -
               if status['reblog'] or status['poll']:

     

       209
       209
       -
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       210
       210
       -
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       211
       211
       -
                   return

     

       35
       35
       +
       INPUTS = {

     

       36
       36
       +
           "mastodon-wss": lambda settings, db: mastodon.MastodonInput(settings, db)

     

       37
       37
       +
       }

     

       212
       38
        
       

     

       213
       213
       -
               in_reply: str | None = status['in_reply_to_id']

     

       214
       214
       -
               in_reply_to: str | None = status['in_reply_to_account_id']

     

       215
       215
       -
               if in_reply_to and in_reply_to != self.user_id:

     

       216
       216
       -
                   # We don't support replies. possible with bridgy(?)

     

       217
       217
       -
                   return

     

       218
       218
       -
               

     

       219
       219
       -
               if status['visibility'] not in ['public', 'unlisted']:

     

       220
       220
       -
                   # Skip f/o and direct posts

     

       221
       221
       -
                   return

     

       222
       222
       -
               

     

       223
       223
       -
               click.echo(f"Got 'update' event for post '{status['id']}'")

     

       224
       224
       -
               

     

       225
       225
       -
               db = self.db.connect()

     

       226
       226
       -
               if in_reply:

     

       227
       227
       -
                   data: dict | None = self.db.read_data(db, in_reply)

     

       228
       228
       -
                   if not data:

     

       229
       229
       -
                       click.echo(f"Post '{status['id']}' is missing parent in the database!")

     

       230
       230
       -
                       return

     

       231
       231
       -
               db.close()

     

       232
       232
       -
               

     

       233
       233
       -
               records = self.create_post_records(status)

     

       234
       234
       -
               if records is None:

     

       235
       235
       -
                   click.echo(f"Skipped crossposting '{status['id']}' due to above erros..")

     

       236
       236
       -
                   return

     

       237
       237
       -
               

     

       238
       238
       -
               refs: list[dict] = []

     

       239
       239
       -
               

     

       240
       240
       -
               for record in records:

     

       241
       241
       -
                   refs.append({'cid': record.cid, 'uri': record.uri})

     

       242
       242
       -
               

     

       243
       243
       -
               db = self.db.connect()

     

       244
       244
       -
               if not in_reply:

     

       245
       245
       -
                   self.db.put_post(db, status['id'], {

     

       246
       246
       -
                       'parent_ref': None,

     

       247
       247
       -
                       'root_ref': None,

     

       248
       248
       -
                       'mapped_post_refs': refs

     

       249
       249
       -
                   })

     

       250
       250
       -
               else:

     

       251
       251
       -
                   self.db.put_post(db, status['id'], {

     

       252
       252
       -
                       'parent_ref': data['mapped_post_refs'][-1],

     

       253
       253
       -
                       'root_ref': data['mapped_post_refs'][-1],

     

       254
       254
       -
                       'mapped_post_refs': refs

     

       255
       255
       -
                   })

     

       256
       256
       -
               db.close()

     

       257
       257
       -
           

     

       258
       258
       -
           def on_delete(self, id: str):

     

       259
       259
       -
               db = self.db.connect()

     

       260
       260
       -
               post_data = self.db.read_data(db, id)

     

       261
       261
       -
               

     

       262
       262
       -
               if not post_data:

     

       263
       263
       -
                   return

     

       264
       264
       -
               

     

       265
       265
       -
               click.echo(f"Got 'delete' event for post '{id}'...")

     

       266
       266
       -
               

     

       267
       267
       -
               for ref in post_data['mapped_post_refs']:

     

       268
       268
       -
                   self.atp.client.delete_post(ref['uri'])

     

       269
       269
       -
               

     

       270
       270
       -
               children: dict[str, dict] = self.db.get_all_children(db, id)

     

       271
       271
       -
               for id, data in children.items():

     

       272
       272
       -
                   for ref in data['mapped_post_refs']:

     

       273
       273
       -
                       self.atp.client.delete_post(ref['uri'])

     

       274
       274
       -
                   self.db.del_post(db, id)

     

       275
       275
       -
               self.db.del_post(db, id)

     

       276
       276
       -
               

     

       277
       277
       -
               db.close()

     

       278
       278
       -
               click.echo(f"Removed post '{id}' and {len(children.items())} replies")

     

       279
       279
       -
                   

     

       280
       280
       -
           # TODO Handle edits

     

       281
       281
       -
           # The issue is that since there are no edits on bluesky, 

     

       282
       282
       -
           # we have to recreate the records while keeping the media in tact.

     

       283
       283
       -
           # also, since the db only stores post relations, we have to pull all the replies from masto and the pds.

     

       284
       284
       -
           def on_status_update(self, status: dict):

     

       285
       285
       -
               if status.get('account', {})['id'] != self.user_id:

     

       286
       286
       -
                   return

     

       287
       287
       -
               if status.get('in_reply_to_account_id') != self.user_id:

     

       288
       288
       -
                   return

     

       289
       289
       -
               

     

       290
       290
       -
               click.echo(f"Got 'status.update' event for post '{status['id']}'")

     

       39
       39
       +
       OUTPUTS = {

     

       40
       40
       +
           "bluesky": lambda input, settings, db: bluesky.BlueskyOutput(input, settings, db)

     

       41
       41
       +
       }

     

       291
       42
        
       

     

       292
       292
       -
       @click.group()

     

       293
       293
       -
       def main():

     

       294
       294
       -
           pass

     

       295
       295
       -
       

     

       296
       296
       -
       @main.command('run')

     

       297
       297
       -
       @click.option(

     

       298
       298
       -
           "-I", "--instance",

     

       299
       299
       -
           envvar="MASTODON_INSTANCE",

     

       300
       300
       -
           required=True,

     

       301
       301
       -
           help="Mastodon compatible instance domain (e.g. https://mastodon.social)"

     

       302
       302
       -
       )

     

       303
       303
       -
       @click.option(

     

       304
       304
       -
           "-T", "--token",

     

       305
       305
       -
           envvar="MASTODON_TOKEN",

     

       306
       306
       -
           required=True,

     

       307
       307
       -
           help="Mastodon access token"

     

       308
       308
       -
       )

     

       309
       309
       -
       @click.option(

     

       310
       310
       -
           "-H", "--handle",

     

       311
       311
       -
           envvar="ATPROTO_HANDLE",

     

       312
       312
       -
           required=True,

     

       313
       313
       -
           help="ATProto handle (e.g. melontini.me)"

     

       314
       314
       -
       )

     

       315
       315
       -
       @click.option(

     

       316
       316
       -
           "-P", "--password",

     

       317
       317
       -
           envvar="ATPROTO_PASSWORD",

     

       318
       318
       -
           required=True,

     

       319
       319
       -
           help="ATProto/Bluesky app password (https://bsky.app/settings/app-passwords)"

     

       320
       320
       -
       )

     

       321
       321
       -
       @click.option('--data_dir', default='./data', type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True, writable=True))

     

       322
       322
       -
       def run(instance, token, handle, password, data_dir):

     

       43
       43
       +
       def execute(data_dir):

     

       323
       44
        
           settings_path = os.path.join(data_dir, 'settings.json')

     

       45
       45
       +
           database_path = os.path.join(data_dir, 'data.db')

     

       46
       46
       +
           

     

       324
       47
        
           if not os.path.exists(settings_path):

     

       325
       325
       -
               click.echo(f"First launch detected! creating {settings_path} and exiting..")

     

       48
       48
       +
               LOGGER.info("First launch detected! Creating %s and exiting!", settings_path)

     

       326
       49
        
               

     

       327
       50
        
               with open(settings_path, 'w') as f:

     

       328
       328
       -
                   json.dump(util.DEFAULT_SETTINGS, f, indent=2)

     

       51
       51
       +
                   json.dump(DEFAULT_SETTINGS, f, indent=2)

     

       329
       52
        
               return 0

     

       330
       53
        
       

     

       54
       54
       +
           LOGGER.info('Loading settings...')

     

       331
       55
        
           with open(settings_path, 'rb') as f:

     

       332
       56
        
               settings = json.load(f)

     

       333
       57
        
           

     

       334
       334
       -
           click.echo(f"Connecting to {instance}...")

     

       335
       335
       -
           fedi = mastodon.Mastodon(instance, token)

     

       58
       58
       +
           LOGGER.info('Starting database worker...')

     

       59
       59
       +
           db_worker = database.DataBaseWorker(os.path.abspath(database_path))

     

       60
       60
       +
           

     

       61
       61
       +
           db_worker.execute('PRAGMA foreign_keys = ON;')

     

       62
       62
       +
           

     

       63
       63
       +
           # create the posts table

     

       64
       64
       +
           # id - internal id of the post

     

       65
       65
       +
           # user_id - user id on the service (e.g. a724sknj5y9ydk0w)

     

       66
       66
       +
           # service - the service (e.g. https://shrimp.melontini.me)

     

       67
       67
       +
           # identifier - post id on the service (e.g. a8mpiyeej0fpjp0p)

     

       68
       68
       +
           # parent_id - the internal id of the parent

     

       69
       69
       +
           db_worker.execute(

     

       70
       70
       +
               """

     

       71
       71
       +
               CREATE TABLE IF NOT EXISTS posts (

     

       72
       72
       +
                   id         INTEGER PRIMARY KEY AUTOINCREMENT,

     

       73
       73
       +
                   user_id    TEXT NOT NULL,

     

       74
       74
       +
                   service    TEXT NOT NULL,

     

       75
       75
       +
                   identifier TEXT NOT NULL UNIQUE,

     

       76
       76
       +
                   parent_id  INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL,

     

       77
       77
       +
                   root_id    INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       78
       78
       +
               );

     

       79
       79
       +
               """

     

       80
       80
       +
           )

     

       81
       81
       +
           

     

       82
       82
       +
           # create the mappings table

     

       83
       83
       +
           # original_post_id - the post this was mapped from

     

       84
       84
       +
           # mapped_post_id - the post this was mapped to

     

       85
       85
       +
           db_worker.execute(

     

       86
       86
       +
               """

     

       87
       87
       +
               CREATE TABLE IF NOT EXISTS mappings (

     

       88
       88
       +
                   original_post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       89
       89
       +
                   mapped_post_id   INTEGER NOT NULL

     

       90
       90
       +
               );          

     

       91
       91
       +
               """

     

       92
       92
       +
           )

     

       336
       93
        
           

     

       337
       337
       -
           if not fedi.streaming:

     

       338
       338
       -
               click.echo(f"{fedi.instance} does not support streaming timelines!", err=True)

     

       339
       339
       -
               return -1

     

       340
       340
       -
               

     

       341
       341
       -
           id = fedi.get_user_id()

     

       342
       342
       -
           if not id:

     

       343
       343
       -
               click.echo(f"Failed to get user id from token for {fedi.instance}", err=True)

     

       344
       344
       -
               return -1

     

       345
       345
       -
           click.echo(f"Got user ID '{id}'")

     

       94
       94
       +
           input_settings = settings.get('input')

     

       95
       95
       +
           if not input_settings:

     

       96
       96
       +
               raise Exception("No input specified!")

     

       97
       97
       +
           outputs_settings = settings.get('outputs', [])

     

       346
       98
        
           

     

       347
       347
       -
           click.echo(f"Resolving ATP identity for {handle}...")

     

       348
       348
       -
           resolver = IdResolver()

     

       349
       349
       -
           did: str | None = resolver.handle.resolve(handle)

     

       350
       350
       -
           if not did:

     

       351
       351
       -
               click.echo(f"Failed to resolve atproto did for handle {handle}!", err=True)

     

       352
       352
       -
               return -1

     

       99
       99
       +
           input = INPUTS[input_settings['type']](input_settings, db_worker)

     

       353
       100
        
           

     

       354
       354
       -
           did_doc = resolver.did.resolve(did)

     

       355
       355
       -
           if not did_doc:

     

       356
       356
       -
               click.echo(f"Failed to resolve did document from {did}")

     

       357
       357
       -
               return -1

     

       101
       101
       +
           outputs: list[cross.Output] = []

     

       102
       102
       +
           for output_settings in outputs_settings:

     

       103
       103
       +
               outputs.append(OUTPUTS[output_settings['type']](input, output_settings, db_worker))

     

       358
       104
        
           

     

       359
       359
       -
           pds = did_doc.get_pds_endpoint()

     

       360
       360
       -
           if not pds:

     

       361
       361
       -
               click.echo(f"Failed to resolve PDS endpoint for did {did}")

     

       362
       362
       -
               return -1

     

       363
       363
       -
       

     

       364
       364
       -
           click.echo(f"Logging in to {handle} through {pds}...")

     

       365
       365
       -
           atp = Client(pds)

     

       366
       366
       -
           atp.login(handle, password)

     

       367
       367
       -
       

     

       368
       368
       -
           click.echo("Starting worker thread...")

     

       105
       105
       +
           LOGGER.info('Starting task worker...')

     

       369
       106
        
           task_queue = queue.Queue()

     

       370
       370
       -
       

     

       371
       107
        
           def worker():

     

       372
       108
        
               while True:

     

       373
       109
        
                   task = task_queue.get()

     
···

       376
       112
        
                   try:

     

       377
       113
        
                       task()

     

       378
       114
        
                   except Exception as e:

     

       379
       379
       -
                       click.echo(f"Exception in worker thread!\n{e}", err=True)

     

       115
       115
       +
                       LOGGER.error(f"Exception in worker thread!\n{e}")

     

       380
       116
        
                       traceback.print_exc()

     

       381
       381
       -
                   

     

       382
       117
        
           thread = threading.Thread(target=worker, daemon=True)

     

       383
       118
        
           thread.start()

     

       384
       384
       -
           

     

       385
       385
       -
           click.echo(f"Listening to {fedi.streaming}...")

     

       386
       386
       -
           listener = SocketListener(id, atp, settings, os.path.join(data_dir, 'data.db'))

     

       119
       119
       +
           LOGGER.info('Listening to %s...', input.service)

     

       120
       120
       +
           asyncio.run(input.listen(outputs, lambda x: task_queue.put(x)))

     

       387
       121
        
           

     

       388
       388
       -
           def handler(event_type, payload):

     

       389
       389
       -
               def handle_event():

     

       390
       390
       -
                   try:

     

       391
       391
       -
                       if event_type == 'update':

     

       392
       392
       -
                           listener.on_update(json.loads(payload))

     

       393
       393
       -
                       elif event_type == 'delete':

     

       394
       394
       -
                           listener.on_delete(payload)

     

       395
       395
       -
                       elif event_type == 'status.update':

     

       396
       396
       -
                           listener.on_status_update(json.loads(payload))

     

       397
       397
       -
                   except Exception as e:

     

       398
       398
       -
                       click.echo(f"Error in event handler: {e}", err=True)

     

       399
       399
       -
                       traceback.print_exc()

     

       400
       400
       -
               task_queue.put(handle_event)

     

       401
       401
       -
           

     

       402
       402
       -
           asyncio.run(fedi.connect_websocket(handler))

     

       403
       403
       -
           

     

       404
       404
       -
           task_queue.join()

     

       405
       405
       -
           

     

       406
       406
       -
           task_queue.put(None)

     

       407
       407
       -
           thread.join()

     

       408
       408
       -
           return 0

     

       409
       122
        
       

     

       410
       123
        
       if __name__ == "__main__":

     

       411
       411
       -
           main()

     

       124
       124
       +
           execute('./data')

+235 -23

mastodon.py

···

       1
       1
       +
       from util import LOGGER

     

       1
       2
        
       import requests, websockets

     

       2
       2
       -
       import util, json

     

       3
       3
       +
       import util, media_util, json

     

       4
       4
       +
       import cross

     

       5
       5
       +
       import database

     

       6
       6
       +
       from database import DataBaseWorker

     

       7
       7
       +
       from typing import Callable, Any

     

       8
       8
       +
       

     

       9
       9
       +
       from bs4 import BeautifulSoup, Tag

     

       10
       10
       +
       from bs4.element import NavigableString

     

       11
       11
       +
       from markdownify import markdownify as md

     

       3
       12
        
       

     

       4
       4
       -
       class Mastodon():

     

       5
       5
       -
           def __init__(self, instance: str, token: str) -> None:

     

       6
       6
       -
               self.token = token

     

       7
       7
       -
               self.instance = instance

     

       8
       8
       -
               self.streaming = self.get_streaming_url()

     

       13
       13
       +
       FORMATS = {

     

       14
       14
       +
           'video': 'video',

     

       15
       15
       +
           'image': 'image',

     

       16
       16
       +
           'gifv': 'gif',

     

       17
       17
       +
           'audio': 'audio',

     

       18
       18
       +
           'unknown': 'other'

     

       19
       19
       +
       }

     

       9
       20
        
       

     

       10
       10
       -
           def get_streaming_url(self):

     

       11
       11
       -
               response = requests.get(f"{self.instance}/api/v1/instance")

     

       12
       12
       -
               response.raise_for_status()

     

       13
       13
       -
               data: dict = response.json()

     

       14
       14
       -
               return util.safe_get(data, "urls", {}).get("streaming_api")

     

       21
       21
       +
       def tokenize_post(status: dict) -> list[cross.Token]:

     

       22
       22
       +
           soup = BeautifulSoup(status['content'], "html.parser")

     

       23
       23
       +
           tokens: list[cross.Token] = []

     

       24
       24
       +
           

     

       25
       25
       +
           tags: list[dict] = status.get('tags', [])

     

       26
       26
       +
           mentions: list[dict] = status.get('mentions', [])

     

       27
       27
       +
           

     

       28
       28
       +
           def recurse(node) -> None:

     

       29
       29
       +
               if isinstance(node, NavigableString):

     

       30
       30
       +
                   tokens.append(cross.TextToken(str(node)))

     

       31
       31
       +
                   return

     

       32
       32
       +
               

     

       33
       33
       +
               if isinstance(node, Tag):

     

       34
       34
       +
                   if node.name.lower() == "a":

     

       35
       35
       +
                       href = node.get("href", "")

     

       36
       36
       +
                       inner_html = "".join(str(c) for c in node.contents)

     

       37
       37
       +
                       link_text_md = md(inner_html)

     

       38
       38
       +
                       

     

       39
       39
       +
                       if link_text_md.startswith('@'):

     

       40
       40
       +
                           as_mention = link_text_md[1:]

     

       41
       41
       +
                           for block in mentions:

     

       42
       42
       +
                               if href == block.get('url'):

     

       43
       43
       +
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       44
       44
       +
                                   return

     

       45
       45
       +
                               elif as_mention == block.get('acct') or as_mention == block.get('username'):

     

       46
       46
       +
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       47
       47
       +
                                   return

     

       48
       48
       +
                       

     

       49
       49
       +
                       if link_text_md.startswith('#'):

     

       50
       50
       +
                           as_tag = link_text_md[1:].lower()

     

       51
       51
       +
                           if any(as_tag == block.get('name') for block in tags):

     

       52
       52
       +
                               tokens.append(cross.TagToken(link_text_md[1:]))

     

       53
       53
       +
                               return

     

       54
       54
       +
                       

     

       55
       55
       +
                       # idk if we can safely convert this to string

     

       56
       56
       +
                       tokens.append(cross.LinkToken(str(href), link_text_md))

     

       57
       57
       +
                       return

     

       58
       58
       +
                   

     

       59
       59
       +
                   if node.find("a") is not None:

     

       60
       60
       +
                       for child in node.contents:

     

       61
       61
       +
                           recurse(child)

     

       62
       62
       +
                       return

     

       63
       63
       +
                   

     

       64
       64
       +
                   serialized = str(node)

     

       65
       65
       +
                   markdownified = md(serialized)

     

       66
       66
       +
                   if markdownified:

     

       67
       67
       +
                       tokens.append(cross.TextToken(markdownified))

     

       68
       68
       +
                   return

     

       69
       69
       +
               return

     

       15
       70
        
           

     

       16
       16
       -
           def get_user_id(self):

     

       17
       17
       -
               responce = requests.get(f"{self.instance}/api/v1/accounts/verify_credentials", headers={

     

       71
       71
       +
           for child in soup.contents:

     

       72
       72
       +
               recurse(child)

     

       73
       73
       +
           

     

       74
       74
       +
           return tokens

     

       75
       75
       +
           

     

       76
       76
       +
       class MastodonPost(cross.Post):

     

       77
       77
       +
           def __init__(self, status: dict) -> None:

     

       78
       78
       +
               super().__init__()

     

       79
       79
       +
               self.status = status

     

       80
       80
       +
               media_attachments: list[cross.MediaAttachment] = []

     

       81
       81
       +
               

     

       82
       82
       +
               for attachment in status['media_attachments']:

     

       83
       83
       +
                   media_attachments.append(MastodonAttachment(attachment))

     

       84
       84
       +
                   

     

       85
       85
       +
               self.media_attachments = media_attachments

     

       86
       86
       +
               

     

       87
       87
       +
               self.tokens = util.tokenize_html(status['content'])

     

       88
       88
       +
           

     

       89
       89
       +
           def get_tokens(self) -> list[cross.Token]:

     

       90
       90
       +
               return self.tokens

     

       91
       91
       +
           

     

       92
       92
       +
           def get_parent_id(self) -> str:

     

       93
       93
       +
               return self.status['in_reply_to_id']

     

       94
       94
       +
           

     

       95
       95
       +
           def get_cw(self) -> str:

     

       96
       96
       +
               return util.safe_get(self.status, 'spoiler_text', '')

     

       97
       97
       +
           

     

       98
       98
       +
           def get_id(self) -> str:

     

       99
       99
       +
               return self.status['id']

     

       100
       100
       +
           

     

       101
       101
       +
           def is_sensitive(self) -> bool:

     

       102
       102
       +
               return self.status['sensitive']

     

       103
       103
       +
           

     

       104
       104
       +
           def get_attachments(self) -> list[cross.MediaAttachment]:

     

       105
       105
       +
               return self.media_attachments

     

       106
       106
       +
       

     

       107
       107
       +
       class MastodonAttachment(cross.MediaAttachment):

     

       108
       108
       +
           def __init__(self, attachment: dict) -> None:

     

       109
       109
       +
               super().__init__()

     

       110
       110
       +
               self.attachment = attachment

     

       111
       111
       +
               

     

       112
       112
       +
               if attachment.get('type') == 'video' or attachment.get('type') == 'image':

     

       113
       113
       +
                   if attachment.get('meta') and attachment.get('meta', {}).get('original'):

     

       114
       114
       +
                       def from_status(bytes: bytes) -> cross.MediaMeta:

     

       115
       115
       +
                           o_meta = attachment.get('meta', {}).get('original')

     

       116
       116
       +
                           return cross.MediaMeta(o_meta['width'], o_meta['height'], o_meta.get('duration', -1))

     

       117
       117
       +
                       self.meta_generator = from_status

     

       118
       118
       +
                   else:

     

       119
       119
       +
                       def from_bytes(bytes: bytes) -> cross.MediaMeta:

     

       120
       120
       +
                           o_meta = media_util.get_media_meta(bytes)

     

       121
       121
       +
                           return cross.MediaMeta(o_meta['width'], o_meta['height'], o_meta.get('duration', -1))

     

       122
       122
       +
                       self.meta_generator = from_bytes

     

       123
       123
       +
           

     

       124
       124
       +
           # URL to download the attachment from

     

       125
       125
       +
           def get_url(self) -> str:

     

       126
       126
       +
               return self.attachment.get('url', '')

     

       127
       127
       +
           

     

       128
       128
       +
           # type of attachment

     

       129
       129
       +
           def get_type(self) -> str | None:

     

       130
       130
       +
               return FORMATS[self.attachment.get('type', 'other')]

     

       131
       131
       +
               

     

       132
       132
       +
           # create file metadata from bytes or other

     

       133
       133
       +
           def create_meta(self, bytes: bytes) -> cross.MediaMeta:

     

       134
       134
       +
               return self.meta_generator(bytes)

     

       135
       135
       +
           

     

       136
       136
       +
           # get media description

     

       137
       137
       +
           def get_alt(self) -> str:

     

       138
       138
       +
               return util.safe_get(self.attachment, 'description', '')

     

       139
       139
       +
       

     

       140
       140
       +
       class MastodonInput(cross.Input):

     

       141
       141
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       142
       142
       +
               self.options = settings.get('options', {})

     

       143
       143
       +
               self.token = util.get_or_envvar(settings, 'token')

     

       144
       144
       +
               instance: str = util.get_or_envvar(settings, 'instance')

     

       145
       145
       +
               

     

       146
       146
       +
               service = instance[:-1] if instance.endswith('/') else instance

     

       147
       147
       +
               

     

       148
       148
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       149
       149
       +
               responce = requests.get(f"{service}/api/v1/accounts/verify_credentials", headers={

     

       18
       150
        
                   'Authorization': f'Bearer {self.token}'

     

       19
       151
        
               })

     

       20
       20
       -
           

     

       21
       152
        
               if responce.status_code == 401:

     

       22
       153
        
                   raise Exception("Invalid Mastodon API token provided!")

     

       154
       154
       +
               

     

       155
       155
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       156
       156
       +
               self.streaming = self._get_streaming_url()

     

       157
       157
       +
               

     

       158
       158
       +
               if not self.streaming:

     

       159
       159
       +
                   raise Exception("Instance %s does not support streaming!", service)

     

       160
       160
       +
       

     

       161
       161
       +
           def _get_streaming_url(self):

     

       162
       162
       +
               response = requests.get(f"{self.service}/api/v1/instance")

     

       163
       163
       +
               response.raise_for_status()

     

       164
       164
       +
               data: dict = response.json()

     

       165
       165
       +
               return util.safe_get(data, "urls", {}).get("streaming_api")

     

       23
       166
        
           

     

       24
       24
       -
               return responce.json()["id"]

     

       167
       167
       +
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       168
       168
       +
               # skip events from other users

     

       169
       169
       +
               if util.safe_get(status, 'account', {})['id'] != self.user_id:

     

       170
       170
       +
                   return

     

       171
       171
       +
               

     

       172
       172
       +
               if status.get('reblog') or status.get('poll'):

     

       173
       173
       +
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       174
       174
       +
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       175
       175
       +
                   LOGGER.info("Skipping '%s'! Reblog or poll..", status['id'])

     

       176
       176
       +
                   return

     

       177
       177
       +
               

     

       178
       178
       +
               in_reply: str | None = status.get('in_reply_to_id')

     

       179
       179
       +
               in_reply_to: str | None = status.get('in_reply_to_account_id')

     

       180
       180
       +
               if in_reply_to and in_reply_to != self.user_id:

     

       181
       181
       +
                   # We don't support replies.

     

       182
       182
       +
                   LOGGER.info("Skipping '%s'! Reply to other user..", status['id'])

     

       183
       183
       +
                   return

     

       184
       184
       +
               

     

       185
       185
       +
               if status.get('visibility') not in self.options.get('allowed_visibility', []):

     

       186
       186
       +
                   # Skip f/o and direct posts

     

       187
       187
       +
                   LOGGER.info("Skipping '%s'! '%s' visibility..", status['id'], status.get('visibility'))

     

       188
       188
       +
                   return

     

       189
       189
       +
               

     

       190
       190
       +
               root_id = None

     

       191
       191
       +
               parent_id = None

     

       192
       192
       +
               if in_reply:

     

       193
       193
       +
                   parent_post = database.find_post(self.db, in_reply, self.user_id, self.service)

     

       194
       194
       +
                   if not parent_post:

     

       195
       195
       +
                       LOGGER.info("Skipping '%s' as parent post was not found in db!", status['id'])

     

       196
       196
       +
                       return

     

       197
       197
       +
                   

     

       198
       198
       +
                   root_id = parent_post['id']

     

       199
       199
       +
                   parent_id = root_id

     

       200
       200
       +
                   if parent_post['root_id']:

     

       201
       201
       +
                       root_id = parent_post['root_id']

     

       202
       202
       +
               

     

       203
       203
       +
               if root_id and parent_id:

     

       204
       204
       +
                   database.insert_reply(

     

       205
       205
       +
                       self.db,

     

       206
       206
       +
                       status['id'],

     

       207
       207
       +
                       self.user_id,

     

       208
       208
       +
                       self.service,

     

       209
       209
       +
                       parent_id,

     

       210
       210
       +
                       root_id

     

       211
       211
       +
                   )

     

       212
       212
       +
               else:

     

       213
       213
       +
                   database.insert_post(

     

       214
       214
       +
                       self.db,

     

       215
       215
       +
                       status['id'],

     

       216
       216
       +
                       self.user_id,

     

       217
       217
       +
                       self.service

     

       218
       218
       +
                   )

     

       219
       219
       +
               

     

       220
       220
       +
               cross_post = MastodonPost(status)

     

       221
       221
       +
               for output in outputs:

     

       222
       222
       +
                   output.accept_post(cross_post)

     

       25
       223
        
           

     

       26
       26
       -
           async def connect_websocket(self, handler):

     

       224
       224
       +
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       225
       225
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       226
       226
       +
               if not post:

     

       227
       227
       +
                   return

     

       228
       228
       +
               

     

       229
       229
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       230
       230
       +
               for output in outputs:

     

       231
       231
       +
                   output.delete_post(identifier)

     

       232
       232
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       233
       233
       +
           

     

       234
       234
       +
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       235
       235
       +
               if event == 'update':

     

       236
       236
       +
                   self._on_create_post(outputs, json.loads(payload))

     

       237
       237
       +
               elif event == 'delete':

     

       238
       238
       +
                   self._on_delete_post(outputs, payload)

     

       239
       239
       +
                       

     

       240
       240
       +
           

     

       241
       241
       +
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       27
       242
        
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       28
       243
        
               async with websockets.connect(uri, extra_headers={

     

       29
       29
       -
                   "User-Agent": "XPost/0.0.1"

     

       30
       30
       -
               }) as websocket:

     

       244
       244
       +
                   "User-Agent": "XPost/0.0.2"

     

       245
       245
       +
               }) as ws:

     

       31
       246
        
                   while True:

     

       32
       32
       -
                       message = await websocket.recv()

     

       247
       247
       +
                       message = await ws.recv()

     

       33
       248
        
                       event: dict = json.loads(message)

     

       34
       34
       -
                   

     

       35
       35
       -
                       event_type = event.get('event')

     

       36
       36
       -
                       payload = event.get('payload')

     

       37
       37
       -
                       handler(event_type, payload)
     

       249
       249
       +
                       submit(lambda: self._on_post(outputs, str(event.get('event')), str(event.get('payload'))))

+12 -25

media_util.py

···

       1
       1
        
       import requests

     

       2
       2
       -
       import click

     

       3
       2
        
       import subprocess

     

       4
       3
        
       import json

     

       4
       4
       +
       from util import LOGGER

     

       5
       5
        
       

     

       6
       6
        
       def probe_bytes(bytes: bytes) -> dict:

     

       7
       7
        
           cmd = [

     
···

       19
       19
        
       

     

       20
       20
        
           return json.loads(proc.stdout)

     

       21
       21
        
       

     

       22
       22
       -
       def compress_image(image_bytes: bytes):

     

       22
       22
       +
       def compress_image(image_bytes: bytes, quality: int = 90):

     

       23
       23
        
           cmd = [

     

       24
       24
        
                   'ffmpeg',

     

       25
       25
        
                   '-f', 'image2pipe',

     

       26
       26
        
                   '-i', 'pipe:0',

     

       27
       27
        
                   '-c:v', 'webp',

     

       28
       28
       -
                   '-q:v', '90',

     

       28
       28
       +
                   '-q:v', str(quality),

     

       29
       29
        
                   '-f', 'image2pipe',

     

       30
       30
        
                   'pipe:1'

     

       31
       31
        
               ]

     
···

       41
       41
        
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       42
       42
        
           response = requests.get(url, stream=True, timeout=20)

     

       43
       43
        
           if response.status_code != 200:

     

       44
       44
       -
               click.echo(f"Failed to download {url}! {response}")

     

       44
       44
       +
               LOGGER.info("Failed to download %s! %s", url, response)

     

       45
       45
        
               return None

     

       46
       46
        
           

     

       47
       47
        
           downloaded_bytes = b""

     
···

       53
       53
        
               

     

       54
       54
        
               current_size += len(chunk)

     

       55
       55
        
               if current_size > max_bytes:

     

       56
       56
       -
                   click.echo(f"Failed to download {url}, file too large!")

     

       57
       56
        
                   response.close()

     

       58
       57
        
                   return None

     

       59
       58
        
               

     
···

       62
       61
        
           return downloaded_bytes

     

       63
       62
        
           

     

       64
       63
        
       

     

       65
       65
       -
       def get_video_meta(video_bytes: bytes):

     

       66
       66
       -
           probe = probe_bytes(video_bytes)

     

       67
       67
       -
           video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']

     

       68
       68
       -
           if not video_streams:

     

       64
       64
       +
       def get_media_meta(bytes: bytes):

     

       65
       65
       +
           probe = probe_bytes(bytes)

     

       66
       66
       +
           streams = [s for s in probe['streams'] if s['codec_type'] == 'video']

     

       67
       67
       +
           if not streams:

     

       69
       68
        
               raise ValueError("No video stream found")

     

       70
       69
        
               

     

       71
       71
       -
           video = video_streams[0]

     

       70
       70
       +
           media = streams[0]

     

       72
       71
        
           return {

     

       73
       73
       -
               'width': int(video['width']),

     

       74
       74
       -
               'height': int(video['height']),

     

       75
       75
       -
               'duration': float(video.get('duration', probe['format'].get('duration', -1)))

     

       76
       76
       -
           }

     

       77
       77
       -
       

     

       78
       78
       -
       def get_image_meta(image_bytes: bytes):

     

       79
       79
       -
           probe = probe_bytes(image_bytes)

     

       80
       80
       -
           stream = next((s for s in probe['streams'] if s['codec_type'] == 'video'), None)

     

       81
       81
       -
               

     

       82
       82
       -
           if not stream:

     

       83
       83
       -
               raise ValueError("No video stream found")

     

       84
       84
       -
               

     

       85
       85
       -
           return {

     

       86
       86
       -
               'width': int(stream['width']),

     

       87
       87
       -
               'height': int(stream['height'])

     

       72
       72
       +
               'width': int(media['width']),

     

       73
       73
       +
               'height': int(media['height']),

     

       74
       74
       +
               'duration': float(media.get('duration', probe['format'].get('duration', -1)))

     

       88
       75
        
           }

+2 -1

pyproject.toml

···

       1
       1
        
       [project]

     

       2
       2
        
       name = "xpost"

     

       3
       3
       -
       version = "0.0.1"

     

       3
       3
       +
       version = "0.0.2"

     

       4
       4
        
       description = "mastodon -> bluesky crossposting tool"

     

       5
       5
        
       readme = "README.md"

     

       6
       6
        
       requires-python = ">=3.12"

     

       7
       7
        
       dependencies = [

     

       8
       8
        
           "atproto>=0.0.61",

     

       9
       9
       +
           "bs4>=0.0.2",

     

       9
       10
        
           "click>=8.2.1",

     

       10
       11
        
           "markdownify>=1.1.0",

     

       11
       12
        
           "requests>=2.32.3",

+35 -26

util.py

···

       1
       1
        
       import re

     

       2
       2
        
       from markdownify import markdownify as md

     

       3
       3
       +
       import cross

     

       4
       4
       +
       import logging, sys, os

     

       5
       5
       +
       

     

       6
       6
       +
       logging.basicConfig(stream=sys.stdout, level=logging.INFO)

     

       7
       7
       +
       LOGGER = logging.getLogger("XPost")

     

       3
       8
        
       

     

       4
       9
        
       ALTERNATE = re.compile(r'\S+|\s+')

     

       5
       10
        
       

     
···

       15
       20
        
       def tokenize_html(content: str):

     

       16
       21
        
           return tokenize_markdown(md(content, autolinks=False))

     

       17
       22
        
       

     

       18
       18
       -
       def tokenize_markdown(md):

     

       23
       23
       +
       def tokenize_markdown(md) -> list[cross.Token]:

     

       19
       24
        
           tokens = []

     

       20
       25
        
           i = 0

     

       21
       26
        
           length = len(md)

     
···

       37
       42
        
                           url += md[i]

     

       38
       43
        
                           i += 1

     

       39
       44
        
                       i += 1  # skip )

     

       40
       40
       -
                       tokens.append({'type': 'media', 'alt': alt_text, 'url': url})

     

       45
       45
       +
                       #tokens.append({'type': 'media', 'alt': alt_text, 'url': url})

     

       41
       46
        
                   else:

     

       42
       42
       -
                       tokens.append({'type': 'text', 'content': md[start:i]})

     

       47
       47
       +
                       tokens.append(cross.TextToken(md[start:i]))

     

       43
       48
        
               elif md[i] == '[':

     

       44
       49
        
                   # link or special

     

       45
       50
        
                   start = i

     
···

       57
       62
        
                           i += 1

     

       58
       63
        
                       i += 1  # skip )

     

       59
       64
        
                       if link_text.startswith('#'):

     

       60
       60
       -
                           tokens.append({'type': 'hashtag', 'tag': link_text[1:], 'url': url})

     

       65
       65
       +
                           tokens.append(cross.TagToken(link_text[1:]))

     

       61
       66
        
                       elif link_text.startswith('@'):

     

       62
       62
       -
                           tokens.append({'type': 'mention', 'mention': link_text[1:], 'url': url})

     

       67
       67
       +
                           tokens.append(cross.MentionToken(link_text[1:], url))

     

       63
       68
        
                       elif link_text.startswith('http://') or link_text.startswith('https://'):

     

       64
       64
       -
                           tokens.append({'type': 'link', 'text': link_text, 'url': url})

     

       69
       69
       +
                           tokens.append(cross.LinkToken(url, link_text))

     

       65
       70
        
                       else:

     

       66
       66
       -
                           tokens.append({'type': 'link', 'text': link_text, 'url': url})

     

       71
       71
       +
                           tokens.append(cross.LinkToken(url, link_text))

     

       67
       72
        
                   else:

     

       68
       68
       -
                       tokens.append({'type': 'text', 'content': md[start:i]})

     

       73
       73
       +
                       tokens.append(cross.TextToken(md[start:i]))

     

       69
       74
        
               else:

     

       70
       75
        
                   # plain text

     

       71
       76
        
                   start = i

     

       72
       77
        
                   while i < length and md[i] != '[' and not (md[i] == '!' and i + 1 < length and md[i + 1] == '['):

     

       73
       78
        
                       i += 1

     

       74
       74
       -
                   tokens.append({'type': 'text', 'content': md[start:i]})

     

       79
       79
       +
                   tokens.append(cross.TextToken(md[start:i]))

     

       75
       80
        
           return tokens

     

       76
       81
        
       

     

       77
       82
        
       

     

       78
       78
       -
       def split_tokens(tokens: list[dict], max_chars: int) -> list[list[dict]]:

     

       83
       83
       +
       def split_tokens(tokens: list[cross.Token], max_chars: int) -> list[list[cross.Token]]:

     

       79
       84
        
           def start_new_block():

     

       80
       85
        
               nonlocal current_block, blocks, current_length

     

       81
       86
        
               if current_block:

     
···

       86
       91
        
           def append_text_to_block(text_segment):

     

       87
       92
        
               nonlocal current_block

     

       88
       93
        
               # if the last element in the current block is also text, just append to it

     

       89
       89
       -
               if current_block and current_block[-1]['type'] == 'text':

     

       90
       90
       -
                   current_block[-1]['content'] += text_segment

     

       94
       94
       +
               if current_block and isinstance(current_block[-1], cross.TextToken):

     

       95
       95
       +
                   current_block[-1].text += text_segment

     

       91
       96
        
               else:

     

       92
       92
       -
                   current_block.append({'type': 'text', 'content': text_segment})

     

       97
       97
       +
                   current_block.append(cross.TextToken(text_segment))

     

       93
       98
        
           

     

       94
       94
       -
           blocks: list[list[dict]] = []

     

       95
       95
       -
           current_block: list[dict] = []

     

       99
       99
       +
           blocks: list[list[cross.Token]] = []

     

       100
       100
       +
           current_block: list[cross.Token] = []

     

       96
       101
        
           current_length: int = 0

     

       97
       102
        
       

     

       98
       103
        
           for token in tokens:

     

       99
       99
       -
               ttype: str = token['type']

     

       100
       100
       -
       

     

       101
       101
       -
               if ttype == 'text':

     

       102
       102
       -
                   content: str = token['content']

     

       104
       104
       +
               if isinstance(token, cross.TextToken):

     

       103
       105
        
                   # split content into alternating “words” (\S+) and “whitespace” (\s+).

     

       104
       106
        
                   # this ensures every space/newline is treated as its own segment.

     

       105
       105
       -
                   segments: list[str] = ALTERNATE.findall(content)

     

       107
       107
       +
                   segments: list[str] = ALTERNATE.findall(token.text)

     

       106
       108
        
       

     

       107
       109
        
                   for seg in segments:

     

       108
       110
        
                       if seg.isspace():

     
···

       162
       164
        
                                   append_text_to_block(word)

     

       163
       165
        
                                   current_length = wlen

     

       164
       166
        
       

     

       165
       165
       -
               elif ttype == 'link':

     

       166
       166
       -
                   url = token['url']

     

       167
       167
       -
                   link_len = min(len(url), 35)

     

       167
       167
       +
               elif isinstance(token, cross.LinkToken):

     

       168
       168
       +
                   link_len = min(len(token.label), 35)

     

       168
       169
        
       

     

       169
       170
        
                   if current_length + link_len <= max_chars:

     

       170
       171
        
                       current_block.append(token)

     
···

       174
       175
        
                       current_block.append(token)

     

       175
       176
        
                       current_length = link_len

     

       176
       177
        
       

     

       177
       177
       -
               elif ttype == 'hashtag':

     

       178
       178
       +
               elif isinstance(token, cross.TagToken):

     

       178
       179
        
                   # we treat a hashtag like “#tagname” for counting.

     

       179
       179
       -
                   hashtag_len = 1 + len(token['tag'])

     

       180
       180
       +
                   hashtag_len = 1 + len(token.tag)

     

       180
       181
        
                   if current_length + hashtag_len <= max_chars:

     

       181
       182
        
                       current_block.append(token)

     

       182
       183
        
                       current_length += hashtag_len

     
···

       197
       198
        
       

     

       198
       199
        
       def safe_get(obj: dict, key: str, default):

     

       199
       200
        
           val = obj.get(key, default)

     

       200
       200
       -
           return val if val else default
     

       201
       201
       +
           return val if val else default

     

       202
       202
       +
       

     

       203
       203
       +
       def value_or_envvar(text: str) -> str:

     

       204
       204
       +
           if text.startswith('env:'):

     

       205
       205
       +
               return os.environ.get(text[4:], '')

     

       206
       206
       +
           return text

     

       207
       207
       +
       

     

       208
       208
       +
       def get_or_envvar(obj: dict, key: str):

     

       209
       209
       +
           return value_or_envvar(obj.get(key, ''))

+14

uv.lock

···

       58
       58
        
       ]

     

       59
       59
        
       

     

       60
       60
        
       [[package]]

     

       61
       61
       +
       name = "bs4"

     

       62
       62
       +
       version = "0.0.2"

     

       63
       63
       +
       source = { registry = "https://pypi.org/simple" }

     

       64
       64
       +
       dependencies = [

     

       65
       65
       +
           { name = "beautifulsoup4" },

     

       66
       66
       +
       ]

     

       67
       67
       +
       sdist = { url = "https://files.pythonhosted.org/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698, upload-time = "2024-01-17T18:15:47.371Z" }

     

       68
       68
       +
       wheels = [

     

       69
       69
       +
           { url = "https://files.pythonhosted.org/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189, upload-time = "2024-01-17T18:15:48.613Z" },

     

       70
       70
       +
       ]

     

       71
       71
       +
       

     

       72
       72
       +
       [[package]]

     

       61
       73
        
       name = "certifi"

     

       62
       74
        
       version = "2025.4.26"

     

       63
       75
        
       source = { registry = "https://pypi.org/simple" }

     
···

       467
       479
        
       source = { virtual = "." }

     

       468
       480
        
       dependencies = [

     

       469
       481
        
           { name = "atproto" },

     

       482
       482
       +
           { name = "bs4" },

     

       470
       483
        
           { name = "click" },

     

       471
       484
        
           { name = "markdownify" },

     

       472
       485
        
           { name = "requests" },

     
···

       475
       488
        
       [package.metadata]

     

       476
       489
        
       requires-dist = [

     

       477
       490
        
           { name = "atproto", specifier = ">=0.0.61" },

     

       491
       491
       +
           { name = "bs4", specifier = ">=0.0.2" },

     

       478
       492
        
           { name = "click", specifier = ">=8.2.1" },

     

       479
       493
        
           { name = "markdownify", specifier = ">=1.1.0" },

     

       480
       494
        
           { name = "requests", specifier = ">=2.32.3" },