comparing ccf3e7de7d872b174a5032f73d3894dcf568c2fa and master on zenfyr.dev/xpost

.dockerignore

···

       1
       1
       +
       .env

     

       2
       2
       +
       .env.*

     

       3
       3
       +
       .gitignore

     

       4
       4
       +
       .DS_Store

     

       5
       5
       +
       *.swp

     

       6
       6
       +
       *~

     

       7
       7
       +
       __pycache__/

     

       8
       8
       +
       .venv

+50

.tangled/workflows/build-images.yml

···

       1
       1
       +
       when:

     

       2
       2
       +
         - event: ["push", "manual"]

     

       3
       3
       +
           branch: master

     

       4
       4
       +
       

     

       5
       5
       +
       engine: nixery

     

       6
       6
       +
       

     

       7
       7
       +
       dependencies:

     

       8
       8
       +
         nixpkgs:

     

       9
       9
       +
           - kaniko

     

       10
       10
       +
           - regctl

     

       11
       11
       +
       

     

       12
       12
       +
       environment:

     

       13
       13
       +
         GHCR_USER: "zenfyrdev"

     

       14
       14
       +
       

     

       15
       15
       +
       steps:

     

       16
       16
       +
         - name: create auth configs

     

       17
       17
       +
           command: |

     

       18
       18
       +
             mkdir -p $HOME/.docker $HOME/.regctl

     

       19
       19
       +
       

     

       20
       20
       +
             cat > $HOME/.docker/config.json <<EOF

     

       21
       21
       +
             {"auths": {"ghcr.io": {"auth": "$(echo -n "$GHCR_USER:$GHCR_PAT" | base64 -w0)"}}}

     

       22
       22
       +
             EOF

     

       23
       23
       +
       

     

       24
       24
       +
             cat > $HOME/.regctl/config.json <<EOF

     

       25
       25
       +
             {"hosts": {"ghcr.io": {"user": "$GHCR_USER","pass": "$GHCR_PAT"}}}

     

       26
       26
       +
             EOF

     

       27
       27
       +
       

     

       28
       28
       +
         - name: build amd64

     

       29
       29
       +
           command: |

     

       30
       30
       +
             executor \

     

       31
       31
       +
               --context=dir://. \

     

       32
       32
       +
               --dockerfile=Containerfile \

     

       33
       33
       +
               --verbosity=info \

     

       34
       34
       +
               --destination=ghcr.io/$GHCR_USER/xpost:amd64-latest \

     

       35
       35
       +
               --custom-platform=linux/amd64

     

       36
       36
       +
       

     

       37
       37
       +
         - name: build arm64

     

       38
       38
       +
           command: |

     

       39
       39
       +
             executor \

     

       40
       40
       +
               --context=dir://. \

     

       41
       41
       +
               --dockerfile=Containerfile \

     

       42
       42
       +
               --verbosity=info \

     

       43
       43
       +
               --destination=ghcr.io/$GHCR_USER/xpost:arm64-latest \

     

       44
       44
       +
               --custom-platform=linux/arm64

     

       45
       45
       +
       

     

       46
       46
       +
         - name: tag latest artifact

     

       47
       47
       +
           command: |

     

       48
       48
       +
             regctl index create ghcr.io/$GHCR_USER/xpost:latest \

     

       49
       49
       +
               --ref ghcr.io/$GHCR_USER/xpost:amd64-latest  --platform linux/amd64 \

     

       50
       50
       +
               --ref ghcr.io/$GHCR_USER/xpost:arm64-latest  --platform linux/arm64

+41

Containerfile

···

       1
       1
       +
       FROM python:3.12-alpine

     

       2
       2
       +
       COPY --from=ghcr.io/astral-sh/uv:0.7.12 /uv /uvx /bin/

     

       3
       3
       +
       

     

       4
       4
       +
       # Install build tools & runtime dependencies

     

       5
       5
       +
       RUN apk add --no-cache \

     

       6
       6
       +
           ffmpeg \

     

       7
       7
       +
           file \

     

       8
       8
       +
           libmagic

     

       9
       9
       +
       

     

       10
       10
       +
       RUN mkdir -p /app/data

     

       11
       11
       +
       WORKDIR /app

     

       12
       12
       +
       

     

       13
       13
       +
       # switch to a non-root user

     

       14
       14
       +
       RUN adduser -D -u 1000 app && \

     

       15
       15
       +
           chown -R app:app /app

     

       16
       16
       +
       USER app

     

       17
       17
       +
       

     

       18
       18
       +
       # Enable bytecode compilation

     

       19
       19
       +
       ENV UV_COMPILE_BYTECODE=1

     

       20
       20
       +
       

     

       21
       21
       +
       # Copy from the cache instead of linking since it's a mounted volume

     

       22
       22
       +
       ENV UV_LINK_MODE=copy

     

       23
       23
       +
       

     

       24
       24
       +
       # Install the project's dependencies using the lockfile and settings

     

       25
       25
       +
       COPY ./uv.lock ./pyproject.toml /app/

     

       26
       26
       +
       RUN --mount=type=cache,target=/root/.cache/uv \

     

       27
       27
       +
           uv sync --locked --no-install-project --no-dev

     

       28
       28
       +
       

     

       29
       29
       +
       # Define app data volume

     

       30
       30
       +
       VOLUME /app/data

     

       31
       31
       +
       

     

       32
       32
       +
       # Then, add the rest of the project source code and install it

     

       33
       33
       +
       COPY . /app

     

       34
       34
       +
       RUN --mount=type=cache,target=/root/.cache/uv \

     

       35
       35
       +
           uv sync --locked --no-dev

     

       36
       36
       +
       

     

       37
       37
       +
       # Place executables in the environment at the front of the path

     

       38
       38
       +
       ENV PATH="/app/.venv/bin:$PATH"

     

       39
       39
       +
       

     

       40
       40
       +
       # Set entrypoint to run the app using uv

     

       41
       41
       +
       ENTRYPOINT ["uv", "run", "main.py"]

+37 -10

README.md

···

       1
       1
        
       # XPost

     

       2
       2
        
       

     

       3
       3
       -
       > put more readme here uhhh

     

       3
       3
       +
       XPost is a social media cross-posting tool that differs from others by using streaming APIs to allow instant, zero-input cross-posting. this means you can continue posting on your preferred platform without using special apps.

     

       4
       4
        
       

     

       5
       5
       -
       a silly little crossposting tool based on the mastodon streaming api.

     

       5
       5
       +
       XPost tries to support as many features as possible. for example, when cross-posting from mastodon to bluesky, unsupported file types will be attached as links. posts with mixed media or too many files will be split and spread across text.

     

       6
       6
        
       

     

       7
       7
       -
       this tool is very, very not production ready or something. use with caution.

     

       7
       7
       +
       the tool may undergo breaking changes as new features are added, so proceed with caution when deploying.

     

       8
       8
        
       

     

       9
       9
        
       # Installation

     

       10
       10
       +
       

     

       11
       11
       +
       ## Native

     

       10
       12
        
       

     

       11
       13
        
       first install `ffmpeg`, `ffprobe` and `libmagic`, make sure that `ffmpeg` is available on PATH! `ffmpeg` and `libmagic` are required to crosspost media.

     

       12
       14
        
       

     
···

       22
       24
        
       uv run main.py

     

       23
       25
        
       ```

     

       24
       26
        
       

     

       27
       27
       +
       ## Docker Compose

     

       28
       28
       +
       

     

       29
       29
       +
       the official immage is available on [docker hub](https://hub.docker.com/r/melontini/xpost). example `compose.yaml`. this assumes that data dir is `./data`, and env file is `./.config/docker.env`. add `:Z` to volume mounts for podman.

     

       30
       30
       +
       

     

       31
       31
       +
       ```yaml

     

       32
       32
       +
       services:

     

       33
       33
       +
         xpost:

     

       34
       34
       +
           image: melontini/xpost:latest

     

       35
       35
       +
           restart: unless-stopped

     

       36
       36
       +
           env_file: ./.config/docker.env

     

       37
       37
       +
           volumes:

     

       38
       38
       +
             - ./data:/app/data

     

       39
       39
       +
       ```

     

       40
       40
       +
       

     

       25
       41
        
       # Settings

     

       26
       42
        
       

     

       27
       43
        
       the tool allows you to specify an input and multiple outputs to post to.

     
···

       36
       52
        
       

     

       37
       53
        
       ## Inputs

     

       38
       54
        
       

     

       39
       39
       -
       ### Bluesky PDS WebSocket

     

       55
       55
       +
       all inputs have common options.

     

       56
       56
       +
       

     

       57
       57
       +
       ```json5

     

       58
       58
       +
       {

     

       59
       59
       +
           "options": {

     

       60
       60
       +
               "regex_filters": [ //posts matching any of the following regexes will be skipped

     

       61
       61
       +
                   "(?i)\\b(?:test|hello|hi)\\b"

     

       62
       62
       +
               ]

     

       63
       63
       +
           }

     

       64
       64
       +
       }

     

       65
       65
       +
       ```

     

       40
       66
        
       

     

       41
       41
       -
       **this is meant for self-hosted PDSs that don't emmit a billion events per second.** a jetstream version will be available soon.

     

       67
       67
       +
       ### Bluesky Jetstream

     

       42
       68
        
       

     

       43
       43
       -
       listens to repo operation events emmited by the PDS. handle becomes optional if you specify a DID.

     

       69
       69
       +
       listens to repo operation events emmited by Jetstream. handle becomes optional if you specify a DID.

     

       44
       70
        
       

     

       45
       71
        
       ```json5

     

       46
       72
        
       {

     

       47
       47
       -
           "type": "bluesky-pds-wss",

     

       73
       73
       +
           "type": "bluesky-jetstream-wss",

     

       48
       74
        
           "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)

     

       49
       75
        
           "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)

     

       50
       50
       -
           "pds": "end:BLUESKY_PDS" // specify Your PDS directly (avoids DID doc lookup)

     

       76
       76
       +
           "jetstream": "wss://jetstream2.us-east.bsky.network/subscribe" //optional, change jetstream endpoint

     

       51
       77
        
       }

     

       52
       78
        
       ```

     

       53
       79
        
       

     
···

       114
       140
        
       ```json5

     

       115
       141
        
       {

     

       116
       142
        
           "type": "mastodon",

     

       117
       117
       -
           "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read:statuses%20write:statuses%20profile but doesn't work with all software

     

       143
       143
       +
           "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read%20write%20profile but doesn't work with all software

     

       118
       144
        
           "instance": "env:MASTODON_INSTNACE", // mastodon api compatible instance

     

       119
       145
        
           "options": {

     

       120
       146
        
               "visibility": "public"

     
···

       133
       159
        
           "app_password": "env:BLUESKY_APP_PASSWORD", // https://bsky.app/settings/app-passwords

     

       134
       160
        
           "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)

     

       135
       161
        
           "pds": "env:BLUESKY_PDS", // specify Your PDS directly (avoids DID doc lookup)

     

       162
       162
       +
           "bsky_appview": "env:BLUESKY_APPVIEW", // bypass suspensions by specifying a different appview (e.g. did:web:bsky.zeppelin.social)

     

       136
       163
        
           "options": {

     

       137
       164
        
               "encode_videos": true, // bluesky only accepts mp4 videos, try to convert if the video is not mp4

     

       138
       165
        
               "quote_gate": false, // block users from quoting the post

     
···

       144
       171
        
               ]

     

       145
       172
        
           }

     

       146
       173
        
       }

     

       147
       147
       -
       ```
     

       174
       174
       +
       ```

-186

atproto2.py

···

       1
       1
       -
       from typing import Any

     

       2
       2
       -
       from atproto import client_utils, Client, AtUri, IdResolver

     

       3
       3
       -
       from atproto_client import models

     

       4
       4
       -
       from util import LOGGER

     

       5
       5
       -
       

     

       6
       6
       -
       def resolve_identity(

     

       7
       7
       -
           handle: str | None = None, 

     

       8
       8
       -
           did: str | None = None,

     

       9
       9
       -
           pds: str | None = None):

     

       10
       10
       -
           """helper to try and resolve identity from provided parameters, a valid handle is enough"""

     

       11
       11
       -
           

     

       12
       12
       -
           if did and pds:

     

       13
       13
       -
               return did, pds[:-1] if pds.endswith('/') else pds

     

       14
       14
       -
           

     

       15
       15
       -
           resolver = IdResolver()

     

       16
       16
       -
           if not did:

     

       17
       17
       -
               if not handle:

     

       18
       18
       -
                   raise Exception("ATP handle not specified!")

     

       19
       19
       -
               LOGGER.info("Resolving ATP identity for %s...", handle)

     

       20
       20
       -
               did = resolver.handle.resolve(handle)

     

       21
       21
       -
           if not did:

     

       22
       22
       -
               raise Exception("Failed to resolve DID!")

     

       23
       23
       -
           

     

       24
       24
       -
           if not pds:

     

       25
       25
       -
               LOGGER.info("Resolving PDS from DID document...")

     

       26
       26
       -
               did_doc = resolver.did.resolve(did)

     

       27
       27
       -
               if not did_doc:

     

       28
       28
       -
                   raise Exception("Failed to resolve DID doc for '%s'", did)

     

       29
       29
       -
               pds = did_doc.get_pds_endpoint()

     

       30
       30
       -
           if not pds:

     

       31
       31
       -
               raise Exception("Failed to resolve PDS!")

     

       32
       32
       -
           

     

       33
       33
       -
           return did, pds[:-1] if pds.endswith('/') else pds

     

       34
       34
       -
       

     

       35
       35
       -
       class Client2(Client):

     

       36
       36
       -
           def __init__(self, base_url: str | None = None, *args: Any, **kwargs: Any) -> None:

     

       37
       37
       -
               super().__init__(base_url, *args, **kwargs)

     

       38
       38
       -
           

     

       39
       39
       -
           def send_video(

     

       40
       40
       -
               self, 

     

       41
       41
       -
               text: str | client_utils.TextBuilder, 

     

       42
       42
       -
               video: bytes,

     

       43
       43
       -
               video_alt: str | None = None,

     

       44
       44
       -
               video_aspect_ratio: models.AppBskyEmbedDefs.AspectRatio | None = None,

     

       45
       45
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       46
       46
       -
               langs: list[str] | None = None,

     

       47
       47
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       48
       48
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       49
       49
       -
               time_iso: str | None = None

     

       50
       50
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       51
       51
       -
               """same as send_video, but with labels"""

     

       52
       52
       -
               

     

       53
       53
       -
               if video_alt is None:

     

       54
       54
       -
                   video_alt = ''

     

       55
       55
       -
       

     

       56
       56
       -
               upload = self.upload_blob(video)

     

       57
       57
       -
               

     

       58
       58
       -
               return self.send_post(

     

       59
       59
       -
                   text,

     

       60
       60
       -
                   reply_to=reply_to,

     

       61
       61
       -
                   embed=models.AppBskyEmbedVideo.Main(video=upload.blob, alt=video_alt, aspect_ratio=video_aspect_ratio),

     

       62
       62
       -
                   langs=langs,

     

       63
       63
       -
                   facets=facets,

     

       64
       64
       -
                   labels=labels,

     

       65
       65
       -
                   time_iso=time_iso

     

       66
       66
       -
               )

     

       67
       67
       -
           

     

       68
       68
       -
           def send_images(

     

       69
       69
       -
               self, 

     

       70
       70
       -
               text: str | client_utils.TextBuilder, 

     

       71
       71
       -
               images: list[bytes],

     

       72
       72
       -
               image_alts: list[str] | None = None,

     

       73
       73
       -
               image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] | None = None,

     

       74
       74
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       75
       75
       -
               langs: list[str] | None = None,

     

       76
       76
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       77
       77
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       78
       78
       -
               time_iso: str | None = None

     

       79
       79
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       80
       80
       -
               """same as send_images, but with labels"""

     

       81
       81
       -
               

     

       82
       82
       -
               if image_alts is None:

     

       83
       83
       -
                   image_alts = [''] * len(images)

     

       84
       84
       -
               else:

     

       85
       85
       -
                   diff = len(images) - len(image_alts)

     

       86
       86
       -
                   image_alts = image_alts + [''] * diff

     

       87
       87
       -
               

     

       88
       88
       -
               if image_aspect_ratios is None:

     

       89
       89
       -
                   aligned_image_aspect_ratios = [None] * len(images)

     

       90
       90
       -
               else:

     

       91
       91
       -
                   diff = len(images) - len(image_aspect_ratios)

     

       92
       92
       -
                   aligned_image_aspect_ratios = image_aspect_ratios + [None] * diff

     

       93
       93
       -
               

     

       94
       94
       -
               uploads = [self.upload_blob(image) for image in images]

     

       95
       95
       -
               

     

       96
       96
       -
               embed_images = [

     

       97
       97
       -
                   models.AppBskyEmbedImages.Image(alt=alt, image=upload.blob, aspect_ratio=aspect_ratio)

     

       98
       98
       -
                   for alt, upload, aspect_ratio in zip(image_alts, uploads, aligned_image_aspect_ratios)

     

       99
       99
       -
               ]

     

       100
       100
       -
               

     

       101
       101
       -
               return self.send_post(

     

       102
       102
       -
                   text,

     

       103
       103
       -
                   reply_to=reply_to,

     

       104
       104
       -
                   embed=models.AppBskyEmbedImages.Main(images=embed_images),

     

       105
       105
       -
                   langs=langs,

     

       106
       106
       -
                   facets=facets,

     

       107
       107
       -
                   labels=labels,

     

       108
       108
       -
                   time_iso=time_iso

     

       109
       109
       -
               )

     

       110
       110
       -
           

     

       111
       111
       -
           def send_post(

     

       112
       112
       -
               self, 

     

       113
       113
       -
               text: str | client_utils.TextBuilder, 

     

       114
       114
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       115
       115
       -
               embed: 

     

       116
       116
       -
                   None |

     

       117
       117
       -
                   models.AppBskyEmbedImages.Main |

     

       118
       118
       -
                   models.AppBskyEmbedExternal.Main |

     

       119
       119
       -
                   models.AppBskyEmbedRecord.Main |

     

       120
       120
       -
                   models.AppBskyEmbedRecordWithMedia.Main |

     

       121
       121
       -
                   models.AppBskyEmbedVideo.Main = None,

     

       122
       122
       -
               langs: list[str] | None = None,

     

       123
       123
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       124
       124
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       125
       125
       -
               time_iso: str | None = None

     

       126
       126
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       127
       127
       -
               """same as send_post, but with labels"""

     

       128
       128
       -
               

     

       129
       129
       -
               if isinstance(text, client_utils.TextBuilder):

     

       130
       130
       -
                   facets = text.build_facets()

     

       131
       131
       -
                   text = text.build_text()

     

       132
       132
       -
               

     

       133
       133
       -
               repo = self.me and self.me.did

     

       134
       134
       -
               if not repo:

     

       135
       135
       -
                   raise Exception("Client not logged in!")

     

       136
       136
       -
               

     

       137
       137
       -
               if not langs:

     

       138
       138
       -
                   langs = ['en']

     

       139
       139
       -
                   

     

       140
       140
       -
               record = models.AppBskyFeedPost.Record(

     

       141
       141
       -
                   created_at=time_iso or self.get_current_time_iso(),

     

       142
       142
       -
                   text=text,

     

       143
       143
       -
                   reply=reply_to or None,

     

       144
       144
       -
                   embed=embed or None,

     

       145
       145
       -
                   langs=langs,

     

       146
       146
       -
                   facets=facets or None,

     

       147
       147
       -
                   labels=labels or None

     

       148
       148
       -
               )

     

       149
       149
       -
               return self.app.bsky.feed.post.create(repo, record)

     

       150
       150
       -
           

     

       151
       151
       -
           def create_gates(self, thread_gate_opts: list[str], quote_gate: bool, post_uri: str, time_iso: str | None = None):

     

       152
       152
       -
               account = self.me

     

       153
       153
       -
               if not account:

     

       154
       154
       -
                   raise Exception("Client not logged in!")

     

       155
       155
       -
               

     

       156
       156
       -
               rkey = AtUri.from_str(post_uri).rkey

     

       157
       157
       -
               time_iso = time_iso or self.get_current_time_iso()

     

       158
       158
       -
               

     

       159
       159
       -
               if 'everybody' not in thread_gate_opts:

     

       160
       160
       -
                   allow = []

     

       161
       161
       -
                   if thread_gate_opts:

     

       162
       162
       -
                       if 'following' in thread_gate_opts:

     

       163
       163
       -
                           allow.append(models.AppBskyFeedThreadgate.FollowingRule())

     

       164
       164
       -
                       if 'followers' in thread_gate_opts:

     

       165
       165
       -
                           allow.append(models.AppBskyFeedThreadgate.FollowerRule())

     

       166
       166
       -
                       if 'mentioned' in thread_gate_opts:

     

       167
       167
       -
                           allow.append(models.AppBskyFeedThreadgate.MentionRule())

     

       168
       168
       -
                   

     

       169
       169
       -
                   thread_gate = models.AppBskyFeedThreadgate.Record(

     

       170
       170
       -
                       post=post_uri,

     

       171
       171
       -
                       created_at=time_iso,

     

       172
       172
       -
                       allow=allow

     

       173
       173
       -
                   )

     

       174
       174
       -
                   

     

       175
       175
       -
                   self.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)

     

       176
       176
       -
                   

     

       177
       177
       -
               if quote_gate:

     

       178
       178
       -
                   post_gate = models.AppBskyFeedPostgate.Record(

     

       179
       179
       -
                       post=post_uri,

     

       180
       180
       -
                       created_at=time_iso,

     

       181
       181
       -
                       embedding_rules=[

     

       182
       182
       -
                           models.AppBskyFeedPostgate.DisableRule()

     

       183
       183
       -
                       ]

     

       184
       184
       -
                   )

     

       185
       185
       -
                   

     

       186
       186
       -
                   self.app.bsky.feed.postgate.create(account.did, post_gate, rkey)

+196

bluesky/atproto2.py

···

       1
       1
       +
       from typing import Any

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import AtUri, Client, IdResolver, client_utils

     

       4
       4
       +
       from atproto_client import models

     

       5
       5
       +
       

     

       6
       6
       +
       from util.util import LOGGER

     

       7
       7
       +
       

     

       8
       8
       +
       

     

       9
       9
       +
       def resolve_identity(

     

       10
       10
       +
           handle: str | None = None, did: str | None = None, pds: str | None = None

     

       11
       11
       +
       ):

     

       12
       12
       +
           """helper to try and resolve identity from provided parameters, a valid handle is enough"""

     

       13
       13
       +
       

     

       14
       14
       +
           if did and pds:

     

       15
       15
       +
               return did, pds[:-1] if pds.endswith("/") else pds

     

       16
       16
       +
       

     

       17
       17
       +
           resolver = IdResolver()

     

       18
       18
       +
           if not did:

     

       19
       19
       +
               if not handle:

     

       20
       20
       +
                   raise Exception("ATP handle not specified!")

     

       21
       21
       +
               LOGGER.info("Resolving ATP identity for %s...", handle)

     

       22
       22
       +
               did = resolver.handle.resolve(handle)

     

       23
       23
       +
           if not did:

     

       24
       24
       +
               raise Exception("Failed to resolve DID!")

     

       25
       25
       +
       

     

       26
       26
       +
           if not pds:

     

       27
       27
       +
               LOGGER.info("Resolving PDS from DID document...")

     

       28
       28
       +
               did_doc = resolver.did.resolve(did)

     

       29
       29
       +
               if not did_doc:

     

       30
       30
       +
                   raise Exception("Failed to resolve DID doc for '%s'", did)

     

       31
       31
       +
               pds = did_doc.get_pds_endpoint()

     

       32
       32
       +
           if not pds:

     

       33
       33
       +
               raise Exception("Failed to resolve PDS!")

     

       34
       34
       +
       

     

       35
       35
       +
           return did, pds[:-1] if pds.endswith("/") else pds

     

       36
       36
       +
       

     

       37
       37
       +
       

     

       38
       38
       +
       class Client2(Client):

     

       39
       39
       +
           def __init__(self, base_url: str | None = None, *args: Any, **kwargs: Any) -> None:

     

       40
       40
       +
               super().__init__(base_url, *args, **kwargs)

     

       41
       41
       +
       

     

       42
       42
       +
           def send_video(

     

       43
       43
       +
               self,

     

       44
       44
       +
               text: str | client_utils.TextBuilder,

     

       45
       45
       +
               video: bytes,

     

       46
       46
       +
               video_alt: str | None = None,

     

       47
       47
       +
               video_aspect_ratio: models.AppBskyEmbedDefs.AspectRatio | None = None,

     

       48
       48
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       49
       49
       +
               langs: list[str] | None = None,

     

       50
       50
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       51
       51
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       52
       52
       +
               time_iso: str | None = None,

     

       53
       53
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       54
       54
       +
               """same as send_video, but with labels"""

     

       55
       55
       +
       

     

       56
       56
       +
               if video_alt is None:

     

       57
       57
       +
                   video_alt = ""

     

       58
       58
       +
       

     

       59
       59
       +
               upload = self.upload_blob(video)

     

       60
       60
       +
       

     

       61
       61
       +
               return self.send_post(

     

       62
       62
       +
                   text,

     

       63
       63
       +
                   reply_to=reply_to,

     

       64
       64
       +
                   embed=models.AppBskyEmbedVideo.Main(

     

       65
       65
       +
                       video=upload.blob, alt=video_alt, aspect_ratio=video_aspect_ratio

     

       66
       66
       +
                   ),

     

       67
       67
       +
                   langs=langs,

     

       68
       68
       +
                   facets=facets,

     

       69
       69
       +
                   labels=labels,

     

       70
       70
       +
                   time_iso=time_iso,

     

       71
       71
       +
               )

     

       72
       72
       +
       

     

       73
       73
       +
           def send_images(

     

       74
       74
       +
               self,

     

       75
       75
       +
               text: str | client_utils.TextBuilder,

     

       76
       76
       +
               images: list[bytes],

     

       77
       77
       +
               image_alts: list[str] | None = None,

     

       78
       78
       +
               image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] | None = None,

     

       79
       79
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       80
       80
       +
               langs: list[str] | None = None,

     

       81
       81
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       82
       82
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       83
       83
       +
               time_iso: str | None = None,

     

       84
       84
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       85
       85
       +
               """same as send_images, but with labels"""

     

       86
       86
       +
       

     

       87
       87
       +
               if image_alts is None:

     

       88
       88
       +
                   image_alts = [""] * len(images)

     

       89
       89
       +
               else:

     

       90
       90
       +
                   diff = len(images) - len(image_alts)

     

       91
       91
       +
                   image_alts = image_alts + [""] * diff

     

       92
       92
       +
       

     

       93
       93
       +
               if image_aspect_ratios is None:

     

       94
       94
       +
                   aligned_image_aspect_ratios = [None] * len(images)

     

       95
       95
       +
               else:

     

       96
       96
       +
                   diff = len(images) - len(image_aspect_ratios)

     

       97
       97
       +
                   aligned_image_aspect_ratios = image_aspect_ratios + [None] * diff

     

       98
       98
       +
       

     

       99
       99
       +
               uploads = [self.upload_blob(image) for image in images]

     

       100
       100
       +
       

     

       101
       101
       +
               embed_images = [

     

       102
       102
       +
                   models.AppBskyEmbedImages.Image(

     

       103
       103
       +
                       alt=alt, image=upload.blob, aspect_ratio=aspect_ratio

     

       104
       104
       +
                   )

     

       105
       105
       +
                   for alt, upload, aspect_ratio in zip(

     

       106
       106
       +
                       image_alts, uploads, aligned_image_aspect_ratios

     

       107
       107
       +
                   )

     

       108
       108
       +
               ]

     

       109
       109
       +
       

     

       110
       110
       +
               return self.send_post(

     

       111
       111
       +
                   text,

     

       112
       112
       +
                   reply_to=reply_to,

     

       113
       113
       +
                   embed=models.AppBskyEmbedImages.Main(images=embed_images),

     

       114
       114
       +
                   langs=langs,

     

       115
       115
       +
                   facets=facets,

     

       116
       116
       +
                   labels=labels,

     

       117
       117
       +
                   time_iso=time_iso,

     

       118
       118
       +
               )

     

       119
       119
       +
       

     

       120
       120
       +
           def send_post(

     

       121
       121
       +
               self,

     

       122
       122
       +
               text: str | client_utils.TextBuilder,

     

       123
       123
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       124
       124
       +
               embed: None

     

       125
       125
       +
               | models.AppBskyEmbedImages.Main

     

       126
       126
       +
               | models.AppBskyEmbedExternal.Main

     

       127
       127
       +
               | models.AppBskyEmbedRecord.Main

     

       128
       128
       +
               | models.AppBskyEmbedRecordWithMedia.Main

     

       129
       129
       +
               | models.AppBskyEmbedVideo.Main = None,

     

       130
       130
       +
               langs: list[str] | None = None,

     

       131
       131
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       132
       132
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       133
       133
       +
               time_iso: str | None = None,

     

       134
       134
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       135
       135
       +
               """same as send_post, but with labels"""

     

       136
       136
       +
       

     

       137
       137
       +
               if isinstance(text, client_utils.TextBuilder):

     

       138
       138
       +
                   facets = text.build_facets()

     

       139
       139
       +
                   text = text.build_text()

     

       140
       140
       +
       

     

       141
       141
       +
               repo = self.me and self.me.did

     

       142
       142
       +
               if not repo:

     

       143
       143
       +
                   raise Exception("Client not logged in!")

     

       144
       144
       +
       

     

       145
       145
       +
               if not langs:

     

       146
       146
       +
                   langs = ["en"]

     

       147
       147
       +
       

     

       148
       148
       +
               record = models.AppBskyFeedPost.Record(

     

       149
       149
       +
                   created_at=time_iso or self.get_current_time_iso(),

     

       150
       150
       +
                   text=text,

     

       151
       151
       +
                   reply=reply_to or None,

     

       152
       152
       +
                   embed=embed or None,

     

       153
       153
       +
                   langs=langs,

     

       154
       154
       +
                   facets=facets or None,

     

       155
       155
       +
                   labels=labels or None,

     

       156
       156
       +
               )

     

       157
       157
       +
               return self.app.bsky.feed.post.create(repo, record)

     

       158
       158
       +
       

     

       159
       159
       +
           def create_gates(

     

       160
       160
       +
               self,

     

       161
       161
       +
               thread_gate_opts: list[str],

     

       162
       162
       +
               quote_gate: bool,

     

       163
       163
       +
               post_uri: str,

     

       164
       164
       +
               time_iso: str | None = None,

     

       165
       165
       +
           ):

     

       166
       166
       +
               account = self.me

     

       167
       167
       +
               if not account:

     

       168
       168
       +
                   raise Exception("Client not logged in!")

     

       169
       169
       +
       

     

       170
       170
       +
               rkey = AtUri.from_str(post_uri).rkey

     

       171
       171
       +
               time_iso = time_iso or self.get_current_time_iso()

     

       172
       172
       +
       

     

       173
       173
       +
               if "everybody" not in thread_gate_opts:

     

       174
       174
       +
                   allow = []

     

       175
       175
       +
                   if thread_gate_opts:

     

       176
       176
       +
                       if "following" in thread_gate_opts:

     

       177
       177
       +
                           allow.append(models.AppBskyFeedThreadgate.FollowingRule())

     

       178
       178
       +
                       if "followers" in thread_gate_opts:

     

       179
       179
       +
                           allow.append(models.AppBskyFeedThreadgate.FollowerRule())

     

       180
       180
       +
                       if "mentioned" in thread_gate_opts:

     

       181
       181
       +
                           allow.append(models.AppBskyFeedThreadgate.MentionRule())

     

       182
       182
       +
       

     

       183
       183
       +
                   thread_gate = models.AppBskyFeedThreadgate.Record(

     

       184
       184
       +
                       post=post_uri, created_at=time_iso, allow=allow

     

       185
       185
       +
                   )

     

       186
       186
       +
       

     

       187
       187
       +
                   self.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)

     

       188
       188
       +
       

     

       189
       189
       +
               if quote_gate:

     

       190
       190
       +
                   post_gate = models.AppBskyFeedPostgate.Record(

     

       191
       191
       +
                       post=post_uri,

     

       192
       192
       +
                       created_at=time_iso,

     

       193
       193
       +
                       embedding_rules=[models.AppBskyFeedPostgate.DisableRule()],

     

       194
       194
       +
                   )

     

       195
       195
       +
       

     

       196
       196
       +
                   self.app.bsky.feed.postgate.create(account.did, post_gate, rkey)

+199

bluesky/common.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import client_utils

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       from util.media import MediaInfo

     

       7
       7
       +
       from util.util import canonical_label

     

       8
       8
       +
       

     

       9
       9
       +
       # only for lexicon reference

     

       10
       10
       +
       SERVICE = "https://bsky.app"

     

       11
       11
       +
       

     

       12
       12
       +
       # TODO this is terrible and stupid

     

       13
       13
       +
       ADULT_PATTERN = re.compile(

     

       14
       14
       +
           r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE

     

       15
       15
       +
       )

     

       16
       16
       +
       PORN_PATTERN = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       class BlueskyPost(cross.Post):

     

       20
       20
       +
           def __init__(

     

       21
       21
       +
               self, record: dict, tokens: list[cross.Token], attachments: list[MediaInfo]

     

       22
       22
       +
           ) -> None:

     

       23
       23
       +
               super().__init__()

     

       24
       24
       +
               self.uri = record["$xpost.strongRef"]["uri"]

     

       25
       25
       +
               self.parent_uri = None

     

       26
       26
       +
               if record.get("reply"):

     

       27
       27
       +
                   self.parent_uri = record["reply"]["parent"]["uri"]

     

       28
       28
       +
       

     

       29
       29
       +
               self.tokens = tokens

     

       30
       30
       +
               self.timestamp = record["createdAt"]

     

       31
       31
       +
               labels = record.get("labels", {}).get("values")

     

       32
       32
       +
               self.spoiler = None

     

       33
       33
       +
               if labels:

     

       34
       34
       +
                   self.spoiler = ", ".join(

     

       35
       35
       +
                       [str(label["val"]).replace("-", " ") for label in labels]

     

       36
       36
       +
                   )

     

       37
       37
       +
       

     

       38
       38
       +
               self.attachments = attachments

     

       39
       39
       +
               self.languages = record.get("langs", [])

     

       40
       40
       +
       

     

       41
       41
       +
           # at:// of the post record

     

       42
       42
       +
           def get_id(self) -> str:

     

       43
       43
       +
               return self.uri

     

       44
       44
       +
       

     

       45
       45
       +
           def get_parent_id(self) -> str | None:

     

       46
       46
       +
               return self.parent_uri

     

       47
       47
       +
       

     

       48
       48
       +
           def get_tokens(self) -> list[cross.Token]:

     

       49
       49
       +
               return self.tokens

     

       50
       50
       +
       

     

       51
       51
       +
           def get_text_type(self) -> str:

     

       52
       52
       +
               return "text/plain"

     

       53
       53
       +
       

     

       54
       54
       +
           def get_timestamp(self) -> str:

     

       55
       55
       +
               return self.timestamp

     

       56
       56
       +
       

     

       57
       57
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       58
       58
       +
               return self.attachments

     

       59
       59
       +
       

     

       60
       60
       +
           def get_spoiler(self) -> str | None:

     

       61
       61
       +
               return self.spoiler

     

       62
       62
       +
       

     

       63
       63
       +
           def get_languages(self) -> list[str]:

     

       64
       64
       +
               return self.languages

     

       65
       65
       +
       

     

       66
       66
       +
           def is_sensitive(self) -> bool:

     

       67
       67
       +
               return self.spoiler is not None

     

       68
       68
       +
       

     

       69
       69
       +
           def get_post_url(self) -> str | None:

     

       70
       70
       +
               did, _, post_id = str(self.uri[len("at://") :]).split("/")

     

       71
       71
       +
       

     

       72
       72
       +
               return f"https://bsky.app/profile/{did}/post/{post_id}"

     

       73
       73
       +
       

     

       74
       74
       +
       

     

       75
       75
       +
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       76
       76
       +
           text: str = post.get("text", "")

     

       77
       77
       +
           if not text:

     

       78
       78
       +
               return []

     

       79
       79
       +
           ut8_text = text.encode(encoding="utf-8")

     

       80
       80
       +
       

     

       81
       81
       +
           def decode(ut8: bytes) -> str:

     

       82
       82
       +
               return ut8.decode(encoding="utf-8")

     

       83
       83
       +
       

     

       84
       84
       +
           facets: list[dict] = post.get("facets", [])

     

       85
       85
       +
           if not facets:

     

       86
       86
       +
               return [cross.TextToken(decode(ut8_text))]

     

       87
       87
       +
       

     

       88
       88
       +
           slices: list[tuple[int, int, str, str]] = []

     

       89
       89
       +
       

     

       90
       90
       +
           for facet in facets:

     

       91
       91
       +
               features: list[dict] = facet.get("features", [])

     

       92
       92
       +
               if not features:

     

       93
       93
       +
                   continue

     

       94
       94
       +
       

     

       95
       95
       +
               # we don't support overlapping facets/features

     

       96
       96
       +
               feature = features[0]

     

       97
       97
       +
               feature_type = feature["$type"]

     

       98
       98
       +
               index = facet["index"]

     

       99
       99
       +
               match feature_type:

     

       100
       100
       +
                   case "app.bsky.richtext.facet#tag":

     

       101
       101
       +
                       slices.append(

     

       102
       102
       +
                           (index["byteStart"], index["byteEnd"], "tag", feature["tag"])

     

       103
       103
       +
                       )

     

       104
       104
       +
                   case "app.bsky.richtext.facet#link":

     

       105
       105
       +
                       slices.append(

     

       106
       106
       +
                           (index["byteStart"], index["byteEnd"], "link", feature["uri"])

     

       107
       107
       +
                       )

     

       108
       108
       +
                   case "app.bsky.richtext.facet#mention":

     

       109
       109
       +
                       slices.append(

     

       110
       110
       +
                           (index["byteStart"], index["byteEnd"], "mention", feature["did"])

     

       111
       111
       +
                       )

     

       112
       112
       +
       

     

       113
       113
       +
           if not slices:

     

       114
       114
       +
               return [cross.TextToken(decode(ut8_text))]

     

       115
       115
       +
       

     

       116
       116
       +
           slices.sort(key=lambda s: s[0])

     

       117
       117
       +
           unique: list[tuple[int, int, str, str]] = []

     

       118
       118
       +
           current_end = 0

     

       119
       119
       +
           for start, end, ttype, val in slices:

     

       120
       120
       +
               if start >= current_end:

     

       121
       121
       +
                   unique.append((start, end, ttype, val))

     

       122
       122
       +
                   current_end = end

     

       123
       123
       +
       

     

       124
       124
       +
           if not unique:

     

       125
       125
       +
               return [cross.TextToken(decode(ut8_text))]

     

       126
       126
       +
       

     

       127
       127
       +
           tokens: list[cross.Token] = []

     

       128
       128
       +
           prev = 0

     

       129
       129
       +
       

     

       130
       130
       +
           for start, end, ttype, val in unique:

     

       131
       131
       +
               if start > prev:

     

       132
       132
       +
                   # text between facets

     

       133
       133
       +
                   tokens.append(cross.TextToken(decode(ut8_text[prev:start])))

     

       134
       134
       +
               # facet token

     

       135
       135
       +
               match ttype:

     

       136
       136
       +
                   case "link":

     

       137
       137
       +
                       label = decode(ut8_text[start:end])

     

       138
       138
       +
       

     

       139
       139
       +
                       # try to unflatten links

     

       140
       140
       +
                       split = val.split("://", 1)

     

       141
       141
       +
                       if len(split) > 1:

     

       142
       142
       +
                           if split[1].startswith(label):

     

       143
       143
       +
                               tokens.append(cross.LinkToken(val, ""))

     

       144
       144
       +
                               prev = end

     

       145
       145
       +
                               continue

     

       146
       146
       +
       

     

       147
       147
       +
                           if label.endswith("...") and split[1].startswith(label[:-3]):

     

       148
       148
       +
                               tokens.append(cross.LinkToken(val, ""))

     

       149
       149
       +
                               prev = end

     

       150
       150
       +
                               continue

     

       151
       151
       +
       

     

       152
       152
       +
                       tokens.append(cross.LinkToken(val, label))

     

       153
       153
       +
                   case "tag":

     

       154
       154
       +
                       tag = decode(ut8_text[start:end])

     

       155
       155
       +
                       tokens.append(cross.TagToken(tag[1:] if tag.startswith("#") else tag))

     

       156
       156
       +
                   case "mention":

     

       157
       157
       +
                       mention = decode(ut8_text[start:end])

     

       158
       158
       +
                       tokens.append(

     

       159
       159
       +
                           cross.MentionToken(

     

       160
       160
       +
                               mention[1:] if mention.startswith("@") else mention, val

     

       161
       161
       +
                           )

     

       162
       162
       +
                       )

     

       163
       163
       +
               prev = end

     

       164
       164
       +
       

     

       165
       165
       +
           if prev < len(ut8_text):

     

       166
       166
       +
               tokens.append(cross.TextToken(decode(ut8_text[prev:])))

     

       167
       167
       +
       

     

       168
       168
       +
           return tokens

     

       169
       169
       +
       

     

       170
       170
       +
       

     

       171
       171
       +
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       172
       172
       +
           builder = client_utils.TextBuilder()

     

       173
       173
       +
       

     

       174
       174
       +
           def flatten_link(href: str):

     

       175
       175
       +
               split = href.split("://", 1)

     

       176
       176
       +
               if len(split) > 1:

     

       177
       177
       +
                   href = split[1]

     

       178
       178
       +
       

     

       179
       179
       +
               if len(href) > 32:

     

       180
       180
       +
                   href = href[:32] + "..."

     

       181
       181
       +
       

     

       182
       182
       +
               return href

     

       183
       183
       +
       

     

       184
       184
       +
           for token in tokens:

     

       185
       185
       +
               if isinstance(token, cross.TextToken):

     

       186
       186
       +
                   builder.text(token.text)

     

       187
       187
       +
               elif isinstance(token, cross.LinkToken):

     

       188
       188
       +
                   if canonical_label(token.label, token.href):

     

       189
       189
       +
                       builder.link(flatten_link(token.href), token.href)

     

       190
       190
       +
                       continue

     

       191
       191
       +
       

     

       192
       192
       +
                   builder.link(token.label, token.href)

     

       193
       193
       +
               elif isinstance(token, cross.TagToken):

     

       194
       194
       +
                   builder.tag("#" + token.tag, token.tag.lower())

     

       195
       195
       +
               else:

     

       196
       196
       +
                   # fail on unsupported tokens

     

       197
       197
       +
                   return None

     

       198
       198
       +
       

     

       199
       199
       +
           return builder

+203

bluesky/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from typing import Any, Callable

     

       5
       5
       +
       

     

       6
       6
       +
       import websockets

     

       7
       7
       +
       from atproto_client import models

     

       8
       8
       +
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       9
       9
       +
       

     

       10
       10
       +
       import cross

     

       11
       11
       +
       import util.database as database

     

       12
       12
       +
       from bluesky.atproto2 import resolve_identity

     

       13
       13
       +
       from bluesky.common import SERVICE, BlueskyPost, tokenize_post

     

       14
       14
       +
       from util.database import DataBaseWorker

     

       15
       15
       +
       from util.media import MediaInfo, download_media

     

       16
       16
       +
       from util.util import LOGGER, as_envvar

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       class BlueskyInputOptions:

     

       20
       20
       +
           def __init__(self, o: dict) -> None:

     

       21
       21
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       22
       22
       +
       

     

       23
       23
       +
       

     

       24
       24
       +
       class BlueskyInput(cross.Input):

     

       25
       25
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       26
       26
       +
               self.options = BlueskyInputOptions(settings.get("options", {}))

     

       27
       27
       +
               did, pds = resolve_identity(

     

       28
       28
       +
                   handle=as_envvar(settings.get("handle")),

     

       29
       29
       +
                   did=as_envvar(settings.get("did")),

     

       30
       30
       +
                   pds=as_envvar(settings.get("pds")),

     

       31
       31
       +
               )

     

       32
       32
       +
               self.pds = pds

     

       33
       33
       +
       

     

       34
       34
       +
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       35
       35
       +
               super().__init__(SERVICE, did, settings, db)

     

       36
       36
       +
       

     

       37
       37
       +
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       38
       38
       +
               post_uri = post["$xpost.strongRef"]["uri"]

     

       39
       39
       +
               post_cid = post["$xpost.strongRef"]["cid"]

     

       40
       40
       +
       

     

       41
       41
       +
               parent_uri = None

     

       42
       42
       +
               if post.get("reply"):

     

       43
       43
       +
                   parent_uri = post["reply"]["parent"]["uri"]

     

       44
       44
       +
       

     

       45
       45
       +
               embed = post.get("embed", {})

     

       46
       46
       +
               if embed.get("$type") in (

     

       47
       47
       +
                   "app.bsky.embed.record",

     

       48
       48
       +
                   "app.bsky.embed.recordWithMedia",

     

       49
       49
       +
               ):

     

       50
       50
       +
                   did, collection, rid = str(embed["record"]["uri"][len("at://") :]).split(

     

       51
       51
       +
                       "/"

     

       52
       52
       +
                   )

     

       53
       53
       +
                   if collection == "app.bsky.feed.post":

     

       54
       54
       +
                       LOGGER.info("Skipping '%s'! Quote..", post_uri)

     

       55
       55
       +
                       return

     

       56
       56
       +
       

     

       57
       57
       +
               success = database.try_insert_post(

     

       58
       58
       +
                   self.db, post_uri, parent_uri, self.user_id, self.service

     

       59
       59
       +
               )

     

       60
       60
       +
               if not success:

     

       61
       61
       +
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", post_uri)

     

       62
       62
       +
                   return

     

       63
       63
       +
               database.store_data(

     

       64
       64
       +
                   self.db, post_uri, self.user_id, self.service, {"cid": post_cid}

     

       65
       65
       +
               )

     

       66
       66
       +
       

     

       67
       67
       +
               tokens = tokenize_post(post)

     

       68
       68
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       69
       69
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", post_uri)

     

       70
       70
       +
                   return

     

       71
       71
       +
       

     

       72
       72
       +
               LOGGER.info("Crossposting '%s'...", post_uri)

     

       73
       73
       +
       

     

       74
       74
       +
               def get_blob_url(blob: str):

     

       75
       75
       +
                   return f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}"

     

       76
       76
       +
       

     

       77
       77
       +
               attachments: list[MediaInfo] = []

     

       78
       78
       +
               if embed.get("$type") == "app.bsky.embed.images":

     

       79
       79
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       80
       80
       +
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       81
       81
       +
       

     

       82
       82
       +
                   for image in model.images:

     

       83
       83
       +
                       url = get_blob_url(image.image.cid.encode())

     

       84
       84
       +
                       LOGGER.info("Downloading %s...", url)

     

       85
       85
       +
                       io = download_media(url, image.alt)

     

       86
       86
       +
                       if not io:

     

       87
       87
       +
                           LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)

     

       88
       88
       +
                           return

     

       89
       89
       +
                       attachments.append(io)

     

       90
       90
       +
               elif embed.get("$type") == "app.bsky.embed.video":

     

       91
       91
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       92
       92
       +
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       93
       93
       +
                   url = get_blob_url(model.video.cid.encode())

     

       94
       94
       +
                   LOGGER.info("Downloading %s...", url)

     

       95
       95
       +
                   io = download_media(url, model.alt if model.alt else "")

     

       96
       96
       +
                   if not io:

     

       97
       97
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)

     

       98
       98
       +
                       return

     

       99
       99
       +
                   attachments.append(io)

     

       100
       100
       +
       

     

       101
       101
       +
               cross_post = BlueskyPost(post, tokens, attachments)

     

       102
       102
       +
               for output in outputs:

     

       103
       103
       +
                   output.accept_post(cross_post)

     

       104
       104
       +
       

     

       105
       105
       +
           def _on_delete_post(self, outputs: list[cross.Output], post_id: str, repost: bool):

     

       106
       106
       +
               post = database.find_post(self.db, post_id, self.user_id, self.service)

     

       107
       107
       +
               if not post:

     

       108
       108
       +
                   return

     

       109
       109
       +
       

     

       110
       110
       +
               LOGGER.info("Deleting '%s'...", post_id)

     

       111
       111
       +
               if repost:

     

       112
       112
       +
                   for output in outputs:

     

       113
       113
       +
                       output.delete_repost(post_id)

     

       114
       114
       +
               else:

     

       115
       115
       +
                   for output in outputs:

     

       116
       116
       +
                       output.delete_post(post_id)

     

       117
       117
       +
               database.delete_post(self.db, post_id, self.user_id, self.service)

     

       118
       118
       +
       

     

       119
       119
       +
           def _on_repost(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       120
       120
       +
               post_uri = post["$xpost.strongRef"]["uri"]

     

       121
       121
       +
               post_cid = post["$xpost.strongRef"]["cid"]

     

       122
       122
       +
       

     

       123
       123
       +
               reposted_uri = post["subject"]["uri"]

     

       124
       124
       +
       

     

       125
       125
       +
               success = database.try_insert_repost(

     

       126
       126
       +
                   self.db, post_uri, reposted_uri, self.user_id, self.service

     

       127
       127
       +
               )

     

       128
       128
       +
               if not success:

     

       129
       129
       +
                   LOGGER.info("Skipping '%s' as reposted post was not found in db!", post_uri)

     

       130
       130
       +
                   return

     

       131
       131
       +
               database.store_data(

     

       132
       132
       +
                   self.db, post_uri, self.user_id, self.service, {"cid": post_cid}

     

       133
       133
       +
               )

     

       134
       134
       +
       

     

       135
       135
       +
               LOGGER.info("Crossposting '%s'...", post_uri)

     

       136
       136
       +
               for output in outputs:

     

       137
       137
       +
                   output.accept_repost(post_uri, reposted_uri)

     

       138
       138
       +
       

     

       139
       139
       +
       

     

       140
       140
       +
       class BlueskyJetstreamInput(BlueskyInput):

     

       141
       141
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       142
       142
       +
               super().__init__(settings, db)

     

       143
       143
       +
               self.jetstream = settings.get(

     

       144
       144
       +
                   "jetstream", "wss://jetstream2.us-east.bsky.network/subscribe"

     

       145
       145
       +
               )

     

       146
       146
       +
       

     

       147
       147
       +
           def __on_commit(self, outputs: list[cross.Output], msg: dict):

     

       148
       148
       +
               if msg.get("did") != self.user_id:

     

       149
       149
       +
                   return

     

       150
       150
       +
       

     

       151
       151
       +
               commit: dict = msg.get("commit", {})

     

       152
       152
       +
               if not commit:

     

       153
       153
       +
                   return

     

       154
       154
       +
       

     

       155
       155
       +
               commit_type = commit["operation"]

     

       156
       156
       +
               match commit_type:

     

       157
       157
       +
                   case "create":

     

       158
       158
       +
                       record = dict(commit.get("record", {}))

     

       159
       159
       +
                       record["$xpost.strongRef"] = {

     

       160
       160
       +
                           "cid": commit["cid"],

     

       161
       161
       +
                           "uri": f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}",

     

       162
       162
       +
                       }

     

       163
       163
       +
       

     

       164
       164
       +
                       match commit["collection"]:

     

       165
       165
       +
                           case "app.bsky.feed.post":

     

       166
       166
       +
                               self._on_post(outputs, record)

     

       167
       167
       +
                           case "app.bsky.feed.repost":

     

       168
       168
       +
                               self._on_repost(outputs, record)

     

       169
       169
       +
                   case "delete":

     

       170
       170
       +
                       post_id: str = (

     

       171
       171
       +
                           f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}"

     

       172
       172
       +
                       )

     

       173
       173
       +
                       match commit["collection"]:

     

       174
       174
       +
                           case "app.bsky.feed.post":

     

       175
       175
       +
                               self._on_delete_post(outputs, post_id, False)

     

       176
       176
       +
                           case "app.bsky.feed.repost":

     

       177
       177
       +
                               self._on_delete_post(outputs, post_id, True)

     

       178
       178
       +
       

     

       179
       179
       +
           async def listen(

     

       180
       180
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       181
       181
       +
           ):

     

       182
       182
       +
               uri = self.jetstream + "?"

     

       183
       183
       +
               uri += "wantedCollections=app.bsky.feed.post"

     

       184
       184
       +
               uri += "&wantedCollections=app.bsky.feed.repost"

     

       185
       185
       +
               uri += f"&wantedDids={self.user_id}"

     

       186
       186
       +
       

     

       187
       187
       +
               async for ws in websockets.connect(

     

       188
       188
       +
                   uri, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       189
       189
       +
               ):

     

       190
       190
       +
                   try:

     

       191
       191
       +
                       LOGGER.info("Listening to %s...", self.jetstream)

     

       192
       192
       +
       

     

       193
       193
       +
                       async def listen_for_messages():

     

       194
       194
       +
                           async for msg in ws:

     

       195
       195
       +
                               submit(lambda: self.__on_commit(outputs, json.loads(msg)))

     

       196
       196
       +
       

     

       197
       197
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       198
       198
       +
       

     

       199
       199
       +
                       await asyncio.gather(listen)

     

       200
       200
       +
                   except websockets.ConnectionClosedError as e:

     

       201
       201
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       202
       202
       +
                       LOGGER.info("Reconnecting to %s...", self.jetstream)

     

       203
       203
       +
                       continue

+481

bluesky/output.py

···

       1
       1
       +
       from atproto import Request, client_utils

     

       2
       2
       +
       from atproto_client import models

     

       3
       3
       +
       from httpx import Timeout

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       import misskey.mfm_util as mfm_util

     

       7
       7
       +
       import util.database as database

     

       8
       8
       +
       from bluesky.atproto2 import Client2, resolve_identity

     

       9
       9
       +
       from bluesky.common import ADULT_PATTERN, PORN_PATTERN, SERVICE, tokens_to_richtext

     

       10
       10
       +
       from util.database import DataBaseWorker

     

       11
       11
       +
       from util.media import (

     

       12
       12
       +
           MediaInfo,

     

       13
       13
       +
           compress_image,

     

       14
       14
       +
           convert_to_mp4,

     

       15
       15
       +
           get_filename_from_url,

     

       16
       16
       +
           get_media_meta,

     

       17
       17
       +
       )

     

       18
       18
       +
       from util.util import LOGGER, as_envvar

     

       19
       19
       +
       

     

       20
       20
       +
       ALLOWED_GATES = ["mentioned", "following", "followers", "everybody"]

     

       21
       21
       +
       

     

       22
       22
       +
       

     

       23
       23
       +
       class BlueskyOutputOptions:

     

       24
       24
       +
           def __init__(self, o: dict) -> None:

     

       25
       25
       +
               self.quote_gate: bool = False

     

       26
       26
       +
               self.thread_gate: list[str] = ["everybody"]

     

       27
       27
       +
               self.encode_videos: bool = True

     

       28
       28
       +
       

     

       29
       29
       +
               quote_gate = o.get("quote_gate")

     

       30
       30
       +
               if quote_gate is not None:

     

       31
       31
       +
                   self.quote_gate = bool(quote_gate)

     

       32
       32
       +
       

     

       33
       33
       +
               thread_gate = o.get("thread_gate")

     

       34
       34
       +
               if thread_gate is not None:

     

       35
       35
       +
                   if any([v not in ALLOWED_GATES for v in thread_gate]):

     

       36
       36
       +
                       raise ValueError(

     

       37
       37
       +
                           f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}"

     

       38
       38
       +
                       )

     

       39
       39
       +
                   self.thread_gate = thread_gate

     

       40
       40
       +
       

     

       41
       41
       +
               encode_videos = o.get("encode_videos")

     

       42
       42
       +
               if encode_videos is not None:

     

       43
       43
       +
                   self.encode_videos = bool(encode_videos)

     

       44
       44
       +
       

     

       45
       45
       +
       

     

       46
       46
       +
       class BlueskyOutput(cross.Output):

     

       47
       47
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       48
       48
       +
               super().__init__(input, settings, db)

     

       49
       49
       +
               self.options = BlueskyOutputOptions(settings.get("options") or {})

     

       50
       50
       +
       

     

       51
       51
       +
               if not as_envvar(settings.get("app-password")):

     

       52
       52
       +
                   raise Exception("Account app password not provided!")

     

       53
       53
       +
       

     

       54
       54
       +
               did, pds = resolve_identity(

     

       55
       55
       +
                   handle=as_envvar(settings.get("handle")),

     

       56
       56
       +
                   did=as_envvar(settings.get("did")),

     

       57
       57
       +
                   pds=as_envvar(settings.get("pds")),

     

       58
       58
       +
               )

     

       59
       59
       +
       

     

       60
       60
       +
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       61
       61
       +
       

     

       62
       62
       +
               self.bsky = Client2(pds, request=reqs)

     

       63
       63
       +
               self.bsky.configure_proxy_header(

     

       64
       64
       +
                   service_type="bsky_appview",

     

       65
       65
       +
                   did=as_envvar(settings.get("bsky_appview")) or "did:web:api.bsky.app",

     

       66
       66
       +
               )

     

       67
       67
       +
               self.bsky.login(did, as_envvar(settings.get("app-password")))

     

       68
       68
       +
       

     

       69
       69
       +
           def __check_login(self):

     

       70
       70
       +
               login = self.bsky.me

     

       71
       71
       +
               if not login:

     

       72
       72
       +
                   raise Exception("Client not logged in!")

     

       73
       73
       +
               return login

     

       74
       74
       +
       

     

       75
       75
       +
           def _find_parent(self, parent_id: str):

     

       76
       76
       +
               login = self.__check_login()

     

       77
       77
       +
       

     

       78
       78
       +
               thread_tuple = database.find_mapped_thread(

     

       79
       79
       +
                   self.db,

     

       80
       80
       +
                   parent_id,

     

       81
       81
       +
                   self.input.user_id,

     

       82
       82
       +
                   self.input.service,

     

       83
       83
       +
                   login.did,

     

       84
       84
       +
                   SERVICE,

     

       85
       85
       +
               )

     

       86
       86
       +
       

     

       87
       87
       +
               if not thread_tuple:

     

       88
       88
       +
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       89
       89
       +
                   return None

     

       90
       90
       +
       

     

       91
       91
       +
               root_uri: str = thread_tuple[0]

     

       92
       92
       +
               reply_uri: str = thread_tuple[1]

     

       93
       93
       +
       

     

       94
       94
       +
               root_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]

     

       95
       95
       +
               reply_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]

     

       96
       96
       +
       

     

       97
       97
       +
               root_record = models.AppBskyFeedPost.CreateRecordResponse(

     

       98
       98
       +
                   uri=root_uri, cid=root_cid

     

       99
       99
       +
               )

     

       100
       100
       +
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(

     

       101
       101
       +
                   uri=reply_uri, cid=reply_cid

     

       102
       102
       +
               )

     

       103
       103
       +
       

     

       104
       104
       +
               return (

     

       105
       105
       +
                   models.create_strong_ref(root_record),

     

       106
       106
       +
                   models.create_strong_ref(reply_record),

     

       107
       107
       +
                   thread_tuple[2],

     

       108
       108
       +
                   thread_tuple[3],

     

       109
       109
       +
               )

     

       110
       110
       +
       

     

       111
       111
       +
           def _split_attachments(self, attachments: list[MediaInfo]):

     

       112
       112
       +
               sup_media: list[MediaInfo] = []

     

       113
       113
       +
               unsup_media: list[MediaInfo] = []

     

       114
       114
       +
       

     

       115
       115
       +
               for a in attachments:

     

       116
       116
       +
                   if a.mime.startswith("image/") or a.mime.startswith(

     

       117
       117
       +
                       "video/"

     

       118
       118
       +
                   ):  # TODO convert gifs to videos

     

       119
       119
       +
                       sup_media.append(a)

     

       120
       120
       +
                   else:

     

       121
       121
       +
                       unsup_media.append(a)

     

       122
       122
       +
       

     

       123
       123
       +
               return (sup_media, unsup_media)

     

       124
       124
       +
       

     

       125
       125
       +
           def _split_media_per_post(

     

       126
       126
       +
               self, tokens: list[client_utils.TextBuilder], media: list[MediaInfo]

     

       127
       127
       +
           ):

     

       128
       128
       +
               posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]

     

       129
       129
       +
               available_indices: list[int] = list(range(len(posts)))

     

       130
       130
       +
       

     

       131
       131
       +
               current_image_post_idx: int | None = None

     

       132
       132
       +
       

     

       133
       133
       +
               def make_blank_post() -> dict:

     

       134
       134
       +
                   return {"tokens": [client_utils.TextBuilder().text("")], "attachments": []}

     

       135
       135
       +
       

     

       136
       136
       +
               def pop_next_empty_index() -> int:

     

       137
       137
       +
                   if available_indices:

     

       138
       138
       +
                       return available_indices.pop(0)

     

       139
       139
       +
                   else:

     

       140
       140
       +
                       new_idx = len(posts)

     

       141
       141
       +
                       posts.append(make_blank_post())

     

       142
       142
       +
                       return new_idx

     

       143
       143
       +
       

     

       144
       144
       +
               for att in media:

     

       145
       145
       +
                   if att.mime.startswith("video/"):

     

       146
       146
       +
                       current_image_post_idx = None

     

       147
       147
       +
                       idx = pop_next_empty_index()

     

       148
       148
       +
                       posts[idx]["attachments"].append(att)

     

       149
       149
       +
                   elif att.mime.startswith("image/"):

     

       150
       150
       +
                       if (

     

       151
       151
       +
                           current_image_post_idx is not None

     

       152
       152
       +
                           and len(posts[current_image_post_idx]["attachments"]) < 4

     

       153
       153
       +
                       ):

     

       154
       154
       +
                           posts[current_image_post_idx]["attachments"].append(att)

     

       155
       155
       +
                       else:

     

       156
       156
       +
                           idx = pop_next_empty_index()

     

       157
       157
       +
                           posts[idx]["attachments"].append(att)

     

       158
       158
       +
                           current_image_post_idx = idx

     

       159
       159
       +
       

     

       160
       160
       +
               result: list[tuple[client_utils.TextBuilder, list[MediaInfo]]] = []

     

       161
       161
       +
               for p in posts:

     

       162
       162
       +
                   result.append((p["tokens"], p["attachments"]))

     

       163
       163
       +
               return result

     

       164
       164
       +
       

     

       165
       165
       +
           def accept_post(self, post: cross.Post):

     

       166
       166
       +
               login = self.__check_login()

     

       167
       167
       +
       

     

       168
       168
       +
               parent_id = post.get_parent_id()

     

       169
       169
       +
       

     

       170
       170
       +
               # used for db insertion

     

       171
       171
       +
               new_root_id = None

     

       172
       172
       +
               new_parent_id = None

     

       173
       173
       +
       

     

       174
       174
       +
               root_ref = None

     

       175
       175
       +
               reply_ref = None

     

       176
       176
       +
               if parent_id:

     

       177
       177
       +
                   parents = self._find_parent(parent_id)

     

       178
       178
       +
                   if not parents:

     

       179
       179
       +
                       return

     

       180
       180
       +
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       181
       181
       +
       

     

       182
       182
       +
               tokens = post.get_tokens().copy()

     

       183
       183
       +
       

     

       184
       184
       +
               unique_labels: set[str] = set()

     

       185
       185
       +
               cw = post.get_spoiler()

     

       186
       186
       +
               if cw:

     

       187
       187
       +
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       188
       188
       +
                   unique_labels.add("graphic-media")

     

       189
       189
       +
       

     

       190
       190
       +
                   # from bsky.app, a post can only have one of those labels

     

       191
       191
       +
                   if PORN_PATTERN.search(cw):

     

       192
       192
       +
                       unique_labels.add("porn")

     

       193
       193
       +
                   elif ADULT_PATTERN.search(cw):

     

       194
       194
       +
                       unique_labels.add("sexual")

     

       195
       195
       +
       

     

       196
       196
       +
               if post.is_sensitive():

     

       197
       197
       +
                   unique_labels.add("graphic-media")

     

       198
       198
       +
       

     

       199
       199
       +
               labels = (

     

       200
       200
       +
                   models.ComAtprotoLabelDefs.SelfLabels(

     

       201
       201
       +
                       values=[

     

       202
       202
       +
                           models.ComAtprotoLabelDefs.SelfLabel(val=label)

     

       203
       203
       +
                           for label in unique_labels

     

       204
       204
       +
                       ]

     

       205
       205
       +
                   )

     

       206
       206
       +
                   if unique_labels

     

       207
       207
       +
                   else None

     

       208
       208
       +
               )

     

       209
       209
       +
       

     

       210
       210
       +
               sup_media, unsup_media = self._split_attachments(post.get_attachments())

     

       211
       211
       +
       

     

       212
       212
       +
               if unsup_media:

     

       213
       213
       +
                   if tokens:

     

       214
       214
       +
                       tokens.append(cross.TextToken("\n"))

     

       215
       215
       +
                   for i, attachment in enumerate(unsup_media):

     

       216
       216
       +
                       tokens.append(

     

       217
       217
       +
                           cross.LinkToken(

     

       218
       218
       +
                               attachment.url, f"[{get_filename_from_url(attachment.url)}]"

     

       219
       219
       +
                           )

     

       220
       220
       +
                       )

     

       221
       221
       +
                       tokens.append(cross.TextToken(" "))

     

       222
       222
       +
       

     

       223
       223
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       224
       224
       +
                   tokens, status = mfm_util.strip_mfm(tokens)

     

       225
       225
       +
                   post_url = post.get_post_url()

     

       226
       226
       +
                   if status and post_url:

     

       227
       227
       +
                       tokens.append(cross.TextToken("\n"))

     

       228
       228
       +
                       tokens.append(

     

       229
       229
       +
                           cross.LinkToken(post_url, "[Post contains MFM, see original]")

     

       230
       230
       +
                       )

     

       231
       231
       +
       

     

       232
       232
       +
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       233
       233
       +
               post_text: list[client_utils.TextBuilder] = []

     

       234
       234
       +
       

     

       235
       235
       +
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       236
       236
       +
               for block in split_tokens:

     

       237
       237
       +
                   rich_text = tokens_to_richtext(block)

     

       238
       238
       +
       

     

       239
       239
       +
                   if not rich_text:

     

       240
       240
       +
                       LOGGER.error(

     

       241
       241
       +
                           "Skipping '%s' as it contains invalid rich text types!",

     

       242
       242
       +
                           post.get_id(),

     

       243
       243
       +
                       )

     

       244
       244
       +
                       return

     

       245
       245
       +
                   post_text.append(rich_text)

     

       246
       246
       +
       

     

       247
       247
       +
               if not post_text:

     

       248
       248
       +
                   post_text = [client_utils.TextBuilder().text("")]

     

       249
       249
       +
       

     

       250
       250
       +
               for m in sup_media:

     

       251
       251
       +
                   if m.mime.startswith("image/"):

     

       252
       252
       +
                       if len(m.io) > 2_000_000:

     

       253
       253
       +
                           LOGGER.error(

     

       254
       254
       +
                               "Skipping post_id '%s', failed to download attachment! File too large.",

     

       255
       255
       +
                               post.get_id(),

     

       256
       256
       +
                           )

     

       257
       257
       +
                           return

     

       258
       258
       +
       

     

       259
       259
       +
                   if m.mime.startswith("video/"):

     

       260
       260
       +
                       if m.mime != "video/mp4" and not self.options.encode_videos:

     

       261
       261
       +
                           LOGGER.info(

     

       262
       262
       +
                               "Video is not mp4, but encoding is disabled. Skipping '%s'...",

     

       263
       263
       +
                               post.get_id(),

     

       264
       264
       +
                           )

     

       265
       265
       +
                           return

     

       266
       266
       +
       

     

       267
       267
       +
                       if len(m.io) > 100_000_000:

     

       268
       268
       +
                           LOGGER.error(

     

       269
       269
       +
                               "Skipping post_id '%s', failed to download attachment! File too large?",

     

       270
       270
       +
                               post.get_id(),

     

       271
       271
       +
                           )

     

       272
       272
       +
                           return

     

       273
       273
       +
       

     

       274
       274
       +
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       275
       275
       +
               baked_media = self._split_media_per_post(post_text, sup_media)

     

       276
       276
       +
       

     

       277
       277
       +
               for text, attachments in baked_media:

     

       278
       278
       +
                   if not attachments:

     

       279
       279
       +
                       if reply_ref and root_ref:

     

       280
       280
       +
                           new_post = self.bsky.send_post(

     

       281
       281
       +
                               text,

     

       282
       282
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       283
       283
       +
                                   parent=reply_ref, root=root_ref

     

       284
       284
       +
                               ),

     

       285
       285
       +
                               labels=labels,

     

       286
       286
       +
                               time_iso=post.get_timestamp(),

     

       287
       287
       +
                           )

     

       288
       288
       +
                       else:

     

       289
       289
       +
                           new_post = self.bsky.send_post(

     

       290
       290
       +
                               text, labels=labels, time_iso=post.get_timestamp()

     

       291
       291
       +
                           )

     

       292
       292
       +
                           root_ref = models.create_strong_ref(new_post)

     

       293
       293
       +
       

     

       294
       294
       +
                       self.bsky.create_gates(

     

       295
       295
       +
                           self.options.thread_gate,

     

       296
       296
       +
                           self.options.quote_gate,

     

       297
       297
       +
                           new_post.uri,

     

       298
       298
       +
                           time_iso=post.get_timestamp(),

     

       299
       299
       +
                       )

     

       300
       300
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       301
       301
       +
                       created_records.append(new_post)

     

       302
       302
       +
                   else:

     

       303
       303
       +
                       # if a single post is an image - everything else is an image

     

       304
       304
       +
                       if attachments[0].mime.startswith("image/"):

     

       305
       305
       +
                           images: list[bytes] = []

     

       306
       306
       +
                           image_alts: list[str] = []

     

       307
       307
       +
                           image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       308
       308
       +
       

     

       309
       309
       +
                           for attachment in attachments:

     

       310
       310
       +
                               image_io = compress_image(attachment.io, quality=100)

     

       311
       311
       +
                               metadata = get_media_meta(image_io)

     

       312
       312
       +
       

     

       313
       313
       +
                               if len(image_io) > 1_000_000:

     

       314
       314
       +
                                   LOGGER.info("Compressing %s...", attachment.name)

     

       315
       315
       +
                                   image_io = compress_image(image_io)

     

       316
       316
       +
       

     

       317
       317
       +
                               images.append(image_io)

     

       318
       318
       +
                               image_alts.append(attachment.alt)

     

       319
       319
       +
                               image_aspect_ratios.append(

     

       320
       320
       +
                                   models.AppBskyEmbedDefs.AspectRatio(

     

       321
       321
       +
                                       width=metadata["width"], height=metadata["height"]

     

       322
       322
       +
                                   )

     

       323
       323
       +
                               )

     

       324
       324
       +
       

     

       325
       325
       +
                           new_post = self.bsky.send_images(

     

       326
       326
       +
                               text=post_text[0],

     

       327
       327
       +
                               images=images,

     

       328
       328
       +
                               image_alts=image_alts,

     

       329
       329
       +
                               image_aspect_ratios=image_aspect_ratios,

     

       330
       330
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       331
       331
       +
                                   parent=reply_ref, root=root_ref

     

       332
       332
       +
                               )

     

       333
       333
       +
                               if root_ref and reply_ref

     

       334
       334
       +
                               else None,

     

       335
       335
       +
                               labels=labels,

     

       336
       336
       +
                               time_iso=post.get_timestamp(),

     

       337
       337
       +
                           )

     

       338
       338
       +
                           if not root_ref:

     

       339
       339
       +
                               root_ref = models.create_strong_ref(new_post)

     

       340
       340
       +
       

     

       341
       341
       +
                           self.bsky.create_gates(

     

       342
       342
       +
                               self.options.thread_gate,

     

       343
       343
       +
                               self.options.quote_gate,

     

       344
       344
       +
                               new_post.uri,

     

       345
       345
       +
                               time_iso=post.get_timestamp(),

     

       346
       346
       +
                           )

     

       347
       347
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       348
       348
       +
                           created_records.append(new_post)

     

       349
       349
       +
                       else:  # video is guarantedd to be one

     

       350
       350
       +
                           metadata = get_media_meta(attachments[0].io)

     

       351
       351
       +
                           if metadata["duration"] > 180:

     

       352
       352
       +
                               LOGGER.info(

     

       353
       353
       +
                                   "Skipping post_id '%s', video attachment too long!",

     

       354
       354
       +
                                   post.get_id(),

     

       355
       355
       +
                               )

     

       356
       356
       +
                               return

     

       357
       357
       +
       

     

       358
       358
       +
                           video_io = attachments[0].io

     

       359
       359
       +
                           if attachments[0].mime != "video/mp4":

     

       360
       360
       +
                               LOGGER.info("Converting %s to mp4...", attachments[0].name)

     

       361
       361
       +
                               video_io = convert_to_mp4(video_io)

     

       362
       362
       +
       

     

       363
       363
       +
                           aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       364
       364
       +
                               width=metadata["width"], height=metadata["height"]

     

       365
       365
       +
                           )

     

       366
       366
       +
       

     

       367
       367
       +
                           new_post = self.bsky.send_video(

     

       368
       368
       +
                               text=post_text[0],

     

       369
       369
       +
                               video=video_io,

     

       370
       370
       +
                               video_aspect_ratio=aspect_ratio,

     

       371
       371
       +
                               video_alt=attachments[0].alt,

     

       372
       372
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       373
       373
       +
                                   parent=reply_ref, root=root_ref

     

       374
       374
       +
                               )

     

       375
       375
       +
                               if root_ref and reply_ref

     

       376
       376
       +
                               else None,

     

       377
       377
       +
                               labels=labels,

     

       378
       378
       +
                               time_iso=post.get_timestamp(),

     

       379
       379
       +
                           )

     

       380
       380
       +
                           if not root_ref:

     

       381
       381
       +
                               root_ref = models.create_strong_ref(new_post)

     

       382
       382
       +
       

     

       383
       383
       +
                           self.bsky.create_gates(

     

       384
       384
       +
                               self.options.thread_gate,

     

       385
       385
       +
                               self.options.quote_gate,

     

       386
       386
       +
                               new_post.uri,

     

       387
       387
       +
                               time_iso=post.get_timestamp(),

     

       388
       388
       +
                           )

     

       389
       389
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       390
       390
       +
                           created_records.append(new_post)

     

       391
       391
       +
       

     

       392
       392
       +
               db_post = database.find_post(

     

       393
       393
       +
                   self.db, post.get_id(), self.input.user_id, self.input.service

     

       394
       394
       +
               )

     

       395
       395
       +
               assert db_post, "ghghghhhhh"

     

       396
       396
       +
       

     

       397
       397
       +
               if new_root_id is None or new_parent_id is None:

     

       398
       398
       +
                   new_root_id = database.insert_post(

     

       399
       399
       +
                       self.db, created_records[0].uri, login.did, SERVICE

     

       400
       400
       +
                   )

     

       401
       401
       +
                   database.store_data(

     

       402
       402
       +
                       self.db,

     

       403
       403
       +
                       created_records[0].uri,

     

       404
       404
       +
                       login.did,

     

       405
       405
       +
                       SERVICE,

     

       406
       406
       +
                       {"cid": created_records[0].cid},

     

       407
       407
       +
                   )

     

       408
       408
       +
       

     

       409
       409
       +
                   new_parent_id = new_root_id

     

       410
       410
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       411
       411
       +
                   created_records = created_records[1:]

     

       412
       412
       +
       

     

       413
       413
       +
               for record in created_records:

     

       414
       414
       +
                   new_parent_id = database.insert_reply(

     

       415
       415
       +
                       self.db, record.uri, login.did, SERVICE, new_parent_id, new_root_id

     

       416
       416
       +
                   )

     

       417
       417
       +
                   database.store_data(

     

       418
       418
       +
                       self.db, record.uri, login.did, SERVICE, {"cid": record.cid}

     

       419
       419
       +
                   )

     

       420
       420
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       421
       421
       +
       

     

       422
       422
       +
           def delete_post(self, identifier: str):

     

       423
       423
       +
               login = self.__check_login()

     

       424
       424
       +
       

     

       425
       425
       +
               post = database.find_post(

     

       426
       426
       +
                   self.db, identifier, self.input.user_id, self.input.service

     

       427
       427
       +
               )

     

       428
       428
       +
               if not post:

     

       429
       429
       +
                   return

     

       430
       430
       +
       

     

       431
       431
       +
               mappings = database.find_mappings(self.db, post["id"], SERVICE, login.did)

     

       432
       432
       +
               for mapping in mappings[::-1]:

     

       433
       433
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       434
       434
       +
                   self.bsky.delete_post(mapping[0])

     

       435
       435
       +
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       436
       436
       +
       

     

       437
       437
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       438
       438
       +
               login, repost = self.__delete_repost(repost_id)

     

       439
       439
       +
               if not (login and repost):

     

       440
       440
       +
                   return

     

       441
       441
       +
       

     

       442
       442
       +
               reposted = database.find_post(

     

       443
       443
       +
                   self.db, reposted_id, self.input.user_id, self.input.service

     

       444
       444
       +
               )

     

       445
       445
       +
               if not reposted:

     

       446
       446
       +
                   return

     

       447
       447
       +
       

     

       448
       448
       +
               # mappings of the reposted post

     

       449
       449
       +
               mappings = database.find_mappings(self.db, reposted["id"], SERVICE, login.did)

     

       450
       450
       +
               if mappings:

     

       451
       451
       +
                   cid = database.fetch_data(self.db, mappings[0][0], login.did, SERVICE)[

     

       452
       452
       +
                       "cid"

     

       453
       453
       +
                   ]

     

       454
       454
       +
                   rsp = self.bsky.repost(mappings[0][0], cid)

     

       455
       455
       +
       

     

       456
       456
       +
                   internal_id = database.insert_repost(

     

       457
       457
       +
                       self.db, rsp.uri, reposted["id"], login.did, SERVICE

     

       458
       458
       +
                   )

     

       459
       459
       +
                   database.store_data(self.db, rsp.uri, login.did, SERVICE, {"cid": rsp.cid})

     

       460
       460
       +
                   database.insert_mapping(self.db, repost["id"], internal_id)

     

       461
       461
       +
       

     

       462
       462
       +
           def __delete_repost(

     

       463
       463
       +
               self, repost_id: str

     

       464
       464
       +
           ) -> tuple[models.AppBskyActorDefs.ProfileViewDetailed | None, dict | None]:

     

       465
       465
       +
               login = self.__check_login()

     

       466
       466
       +
       

     

       467
       467
       +
               repost = database.find_post(

     

       468
       468
       +
                   self.db, repost_id, self.input.user_id, self.input.service

     

       469
       469
       +
               )

     

       470
       470
       +
               if not repost:

     

       471
       471
       +
                   return None, None

     

       472
       472
       +
       

     

       473
       473
       +
               mappings = database.find_mappings(self.db, repost["id"], SERVICE, login.did)

     

       474
       474
       +
               if mappings:

     

       475
       475
       +
                   LOGGER.info("Deleting '%s'...", mappings[0][0])

     

       476
       476
       +
                   self.bsky.unrepost(mappings[0][0])

     

       477
       477
       +
                   database.delete_post(self.db, mappings[0][0], login.did, SERVICE)

     

       478
       478
       +
               return login, repost

     

       479
       479
       +
       

     

       480
       480
       +
           def delete_repost(self, repost_id: str):

     

       481
       481
       +
               self.__delete_repost(repost_id)

-684

bluesky.py

···

       1
       1
       -
       from atproto import client_utils, Request, AsyncFirehoseSubscribeReposClient, CAR, CID

     

       2
       2
       -
       from atproto_client import models

     

       3
       3
       -
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       4
       4
       -
       from atproto_client.models.blob_ref import BlobRef

     

       5
       5
       -
       from atproto_firehose import models as firehose_models, parse_subscribe_repos_message as parse_firehose

     

       6
       6
       -
       from atproto2 import Client2, resolve_identity

     

       7
       7
       -
       from httpx import Timeout

     

       8
       8
       -
       import json

     

       9
       9
       -
       import cross

     

       10
       10
       -
       import database

     

       11
       11
       -
       from database import DataBaseWorker

     

       12
       12
       -
       import util

     

       13
       13
       -
       import media_util

     

       14
       14
       -
       from util import LOGGER

     

       15
       15
       -
       import re

     

       16
       16
       -
       from typing import Callable, Any

     

       17
       17
       -
       

     

       18
       18
       -
       # only for lexicon reference

     

       19
       19
       -
       SERVICE = 'https://bsky.app'

     

       20
       20
       -
       

     

       21
       21
       -
       # TODO this is terrible and stupid

     

       22
       22
       -
       ADULT_PATTERN = re.compile(r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE)

     

       23
       23
       -
       PORN_PATTERN  = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       24
       24
       -
       

     

       25
       25
       -
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       26
       26
       -
           text: str = post.get('text', '')

     

       27
       27
       -
           if not text:

     

       28
       28
       -
               return []

     

       29
       29
       -
           text = text.encode(encoding='utf-8').decode(encoding='utf-8')

     

       30
       30
       -
           

     

       31
       31
       -
           facets: list[dict] = post.get('facets', [])

     

       32
       32
       -
           if not facets:

     

       33
       33
       -
               return [cross.TextToken(text)]

     

       34
       34
       -
           

     

       35
       35
       -
           slices: list[tuple[int, int, str, str]] = []

     

       36
       36
       -
           

     

       37
       37
       -
           for facet in facets:

     

       38
       38
       -
               features: list[dict] = facet.get('features', [])

     

       39
       39
       -
               if not features:

     

       40
       40
       -
                   continue

     

       41
       41
       -
               

     

       42
       42
       -
               # we don't support overlapping facets/features

     

       43
       43
       -
               feature = features[0]

     

       44
       44
       -
               feature_type = feature['$type']

     

       45
       45
       -
               index = facet['index']

     

       46
       46
       -
               if feature_type == 'app.bsky.richtext.facet#tag':

     

       47
       47
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'tag', feature['tag']))

     

       48
       48
       -
               elif feature_type == 'app.bsky.richtext.facet#link':

     

       49
       49
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'link', feature['uri']))

     

       50
       50
       -
               elif feature_type == 'app.bsky.richtext.facet#mention':

     

       51
       51
       -
                   slices.append((index['byteStart'], index['byteEnd'], 'mention', feature['did']))

     

       52
       52
       -
           

     

       53
       53
       -
           if not slices:

     

       54
       54
       -
               return [cross.TextToken(text)]

     

       55
       55
       -
           

     

       56
       56
       -
           slices.sort(key=lambda s: s[0])

     

       57
       57
       -
           unique: list[tuple[int, int, str, str]] = []

     

       58
       58
       -
           current_end = 0

     

       59
       59
       -
           for start, end, ttype, val in slices:

     

       60
       60
       -
               if start >= current_end:

     

       61
       61
       -
                   unique.append((start, end, ttype, val))

     

       62
       62
       -
                   current_end = end

     

       63
       63
       -
           

     

       64
       64
       -
           if not unique:

     

       65
       65
       -
               return [cross.TextToken(text)]

     

       66
       66
       -
           

     

       67
       67
       -
           tokens: list[cross.Token] = []

     

       68
       68
       -
           prev = 0

     

       69
       69
       -
           

     

       70
       70
       -
           for start, end, ttype, val in unique:

     

       71
       71
       -
               if start > prev:

     

       72
       72
       -
                   # text between facets

     

       73
       73
       -
                   tokens.append(cross.TextToken(text[prev:start]))

     

       74
       74
       -
               # facet token

     

       75
       75
       -
               if ttype == 'link':

     

       76
       76
       -
                   label = text[start:end]

     

       77
       77
       -
                   

     

       78
       78
       -
                   # try to unflatten links

     

       79
       79
       -
                   split = val.split('://')

     

       80
       80
       -
                   if len(split) > 1:

     

       81
       81
       -
                       if split[1].startswith(label):

     

       82
       82
       -
                           tokens.append(cross.LinkToken(val, ''))

     

       83
       83
       -
                       elif label.endswith('...') and split[1].startswith(label[:-3]):

     

       84
       84
       -
                           tokens.append(cross.LinkToken(val, ''))

     

       85
       85
       -
                   else:

     

       86
       86
       -
                       tokens.append(cross.LinkToken(val, label))

     

       87
       87
       -
               elif ttype == 'tag':

     

       88
       88
       -
                   tokens.append(cross.TagToken(val))

     

       89
       89
       -
               elif ttype == 'mention':

     

       90
       90
       -
                   tokens.append(cross.MentionToken(text[start:end], val))

     

       91
       91
       -
               prev = end

     

       92
       92
       -
       

     

       93
       93
       -
           if prev < len(text):

     

       94
       94
       -
               tokens.append(cross.TextToken(text[prev:]))

     

       95
       95
       -
               

     

       96
       96
       -
           for t in tokens:

     

       97
       97
       -
               print(t.__dict__)

     

       98
       98
       -
           

     

       99
       99
       -
           return tokens

     

       100
       100
       -
       

     

       101
       101
       -
       class BlueskyPost(cross.Post):

     

       102
       102
       -
           def __init__(self, post: dict, attachments: list[media_util.MediaInfo]) -> None:

     

       103
       103
       -
               super().__init__()

     

       104
       104
       -
               self.post = post

     

       105
       105
       -
               self.tokens = tokenize_post(post)

     

       106
       106
       -
               

     

       107
       107
       -
               self.id = json.dumps(self.post['$xpost.strongRef'], sort_keys=True)

     

       108
       108
       -
               

     

       109
       109
       -
               self.parent_id = None

     

       110
       110
       -
               if self.post.get('reply'):

     

       111
       111
       -
                   self.parent_id = json.dumps(self.post['reply']['parent'], sort_keys=True)

     

       112
       112
       -
               

     

       113
       113
       -
               labels = self.post.get('labels', {}).get('values')

     

       114
       114
       -
               self.cw = ''

     

       115
       115
       -
               if labels:

     

       116
       116
       -
                   self.cw = ', '.join([str(label['val']).replace('-', ' ') for label in labels])

     

       117
       117
       -
               self.attachments = attachments

     

       118
       118
       -
           

     

       119
       119
       -
           def get_tokens(self) -> list[cross.Token]:

     

       120
       120
       -
               return self.tokens

     

       121
       121
       -
           

     

       122
       122
       -
           def get_parent_id(self) -> str | None:

     

       123
       123
       -
               return self.parent_id

     

       124
       124
       -
           

     

       125
       125
       -
           def get_post_date_iso(self) -> str:

     

       126
       126
       -
               return self.post.get('createdAt') or super().get_post_date_iso()

     

       127
       127
       -
           

     

       128
       128
       -
           def get_cw(self) -> str:

     

       129
       129
       -
               return self.cw or ''

     

       130
       130
       -
           

     

       131
       131
       -
           def get_id(self) -> str:

     

       132
       132
       -
               return self.id

     

       133
       133
       -
       

     

       134
       134
       -
           def get_languages(self) -> list[str]:

     

       135
       135
       -
               return self.post.get('langs', []) or []

     

       136
       136
       -
           

     

       137
       137
       -
           def is_sensitive(self) -> bool:

     

       138
       138
       -
               return self.post.get('labels', {}).get('values') or False

     

       139
       139
       -
           

     

       140
       140
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       141
       141
       -
               return self.attachments

     

       142
       142
       -
       

     

       143
       143
       -
       class BlueskyInput(cross.Input):

     

       144
       144
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       145
       145
       -
               self.options = settings.get('options', {})

     

       146
       146
       -
               did, pds = resolve_identity(

     

       147
       147
       -
                   handle=util.as_envvar(settings.get('handle')),

     

       148
       148
       -
                   did=util.as_envvar(settings.get('did')),

     

       149
       149
       -
                   pds=util.as_envvar(settings.get('pds'))

     

       150
       150
       -
               )

     

       151
       151
       -
               self.pds = pds

     

       152
       152
       -
               

     

       153
       153
       -
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       154
       154
       -
               super().__init__(SERVICE, did, settings, db)

     

       155
       155
       -
           

     

       156
       156
       -
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       157
       157
       -
               post_ref = json.dumps(post['$xpost.strongRef'], sort_keys=True)

     

       158
       158
       -
               

     

       159
       159
       -
               parent_ref = None

     

       160
       160
       -
               if post.get('reply'):

     

       161
       161
       -
                   parent_ref = json.dumps(post['reply']['parent'], sort_keys=True)

     

       162
       162
       -
                   

     

       163
       163
       -
               success = database.try_insert_post(self.db, post_ref, parent_ref, self.user_id, self.service)

     

       164
       164
       -
               if not success:

     

       165
       165
       -
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", post_ref)

     

       166
       166
       -
                   return

     

       167
       167
       -
               

     

       168
       168
       -
               LOGGER.info("Crossposting '%s'...", post_ref)

     

       169
       169
       -
               

     

       170
       170
       -
               def get_blob_url(blob: str):

     

       171
       171
       -
                   return f'{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}'

     

       172
       172
       -
               

     

       173
       173
       -
               attachments: list[media_util.MediaInfo] = []

     

       174
       174
       -
               embed = post.get('embed', {})

     

       175
       175
       -
               if embed.get('$type') == 'app.bsky.embed.images':

     

       176
       176
       -
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       177
       177
       -
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       178
       178
       -
                   

     

       179
       179
       -
                   for image in model.images:

     

       180
       180
       -
                       url = get_blob_url(image.image.cid.encode())

     

       181
       181
       -
                       LOGGER.info("Downloading %s...", url)

     

       182
       182
       -
                       io = media_util.download_media(url, image.alt)

     

       183
       183
       -
                       if not io:

     

       184
       184
       -
                           LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       185
       185
       -
                           return

     

       186
       186
       -
                       attachments.append(io)

     

       187
       187
       -
               elif embed.get('$type') == 'app.bsky.embed.video':

     

       188
       188
       -
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       189
       189
       -
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       190
       190
       -
                   url = get_blob_url(model.video.cid.encode())

     

       191
       191
       -
                   LOGGER.info("Downloading %s...", url)

     

       192
       192
       -
                   io = media_util.download_media(url, model.alt if model.alt else '')

     

       193
       193
       -
                   if not io:

     

       194
       194
       -
                       LOGGER.error("Skipping '%s'. Failed to download media!", post_ref)

     

       195
       195
       -
                       return

     

       196
       196
       -
                   attachments.append(io)

     

       197
       197
       -
                   

     

       198
       198
       -
               cross_post = BlueskyPost(post, attachments)

     

       199
       199
       -
               for output in outputs:

     

       200
       200
       -
                   output.accept_post(cross_post)

     

       201
       201
       -
               return

     

       202
       202
       -
       

     

       203
       203
       -
           def _on_delete_post(self, outputs: list[cross.Output], post_id: dict):

     

       204
       204
       -
               identifier = json.dumps(post_id, sort_keys=True)

     

       205
       205
       -
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       206
       206
       -
               if not post:

     

       207
       207
       -
                   return

     

       208
       208
       -
               

     

       209
       209
       -
               LOGGER.info("Deleting '%s'...", identifier)

     

       210
       210
       -
               for output in outputs:

     

       211
       211
       -
                   output.delete_post(identifier)

     

       212
       212
       -
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       213
       213
       -
       

     

       214
       214
       -
       class BlueskyPdsInput(BlueskyInput):

     

       215
       215
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       216
       216
       -
               super().__init__(settings, db)

     

       217
       217
       -
           

     

       218
       218
       -
           def __on_commit(self, outputs: list[cross.Output], message: firehose_models.MessageFrame):

     

       219
       219
       -
               blocks = message.body.get('blocks')

     

       220
       220
       -
               if not blocks:

     

       221
       221
       -
                   return

     

       222
       222
       -
               

     

       223
       223
       -
               parsed = parse_firehose(message)

     

       224
       224
       -
               if not isinstance(parsed, models.ComAtprotoSyncSubscribeRepos.Commit):

     

       225
       225
       -
                   return

     

       226
       226
       -
               blocks = parsed.blocks

     

       227
       227
       -
                   

     

       228
       228
       -
               car = None

     

       229
       229
       -
               def get_lazy_repo() -> CAR:

     

       230
       230
       -
                   nonlocal car, blocks

     

       231
       231
       -
                       

     

       232
       232
       -
                   if isinstance(blocks, str):

     

       233
       233
       -
                       blocks = blocks.encode()

     

       234
       234
       -
                   assert blocks

     

       235
       235
       -
                       

     

       236
       236
       -
                   if car:

     

       237
       237
       -
                       return car

     

       238
       238
       -
                   car = CAR.from_bytes(blocks)

     

       239
       239
       -
                   return car

     

       240
       240
       -
                   

     

       241
       241
       -
               for op in parsed.ops:

     

       242
       242
       -
                   if op.action == 'delete':

     

       243
       243
       -
                       if not op.prev:

     

       244
       244
       -
                           continue

     

       245
       245
       -
       

     

       246
       246
       -
                       if not op.path.startswith('app.bsky.feed.post'):

     

       247
       247
       -
                           continue

     

       248
       248
       -
                       

     

       249
       249
       -
                       self._on_delete_post(outputs, {

     

       250
       250
       -
                           'cid': op.prev.encode(),

     

       251
       251
       -
                           'uri': f'at://{parsed.repo}/{op.path}' 

     

       252
       252
       -
                       })

     

       253
       253
       -
                       continue

     

       254
       254
       -
                       

     

       255
       255
       -
                   if op.action != 'create':

     

       256
       256
       -
                       continue

     

       257
       257
       -
       

     

       258
       258
       -
                   if not op.cid:

     

       259
       259
       -
                       continue

     

       260
       260
       -
                       

     

       261
       261
       -
                   record_data = get_lazy_repo().blocks.get(op.cid)

     

       262
       262
       -
                   if not record_data:

     

       263
       263
       -
                       continue

     

       264
       264
       -
                   

     

       265
       265
       -
                   record_dict = dict(record_data)

     

       266
       266
       -
                   record_dict['$xpost.strongRef'] = {

     

       267
       267
       -
                       'cid': op.cid.encode(),

     

       268
       268
       -
                       'uri': f'at://{parsed.repo}/{op.path}'

     

       269
       269
       -
                   }

     

       270
       270
       -
                   if record_dict['$type'] == 'app.bsky.feed.post':

     

       271
       271
       -
                       self._on_post(outputs, record_dict)

     

       272
       272
       -
                       

     

       273
       273
       -
           

     

       274
       274
       -
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       275
       275
       -
               streaming: str = f"wss://{self.pds.split("://", 1)[1]}/xrpc"

     

       276
       276
       -
               

     

       277
       277
       -
               client = AsyncFirehoseSubscribeReposClient(base_uri=streaming)

     

       278
       278
       -
               

     

       279
       279
       -
               async def on_message(message: firehose_models.MessageFrame):

     

       280
       280
       -
                   if message.header.t != '#commit':

     

       281
       281
       -
                       return

     

       282
       282
       -
                   

     

       283
       283
       -
                   if message.body.get('repo') != self.user_id:

     

       284
       284
       -
                       return

     

       285
       285
       -
                   

     

       286
       286
       -
                   if message.body.get('tooBig'):

     

       287
       287
       -
                       LOGGER.error("#commit message is tooBig!")

     

       288
       288
       -
                       return

     

       289
       289
       -
       

     

       290
       290
       -
                   submit(lambda: self.__on_commit(outputs, message))

     

       291
       291
       -
                   return

     

       292
       292
       -
               

     

       293
       293
       -
               LOGGER.info("Listening to %s...", streaming + '/com.atproto.sync.subscribeRepos')

     

       294
       294
       -
               await client.start(on_message)

     

       295
       295
       -
       

     

       296
       296
       -
       ALLOWED_GATES = ['mentioned', 'following', 'followers', 'everybody']

     

       297
       297
       -
       

     

       298
       298
       -
       class BlueskyOutputOptions:

     

       299
       299
       -
           def __init__(self, o: dict) -> None:

     

       300
       300
       -
               self.quote_gate: bool = False

     

       301
       301
       -
               self.thread_gate: list[str] = ['everybody']

     

       302
       302
       -
               self.encode_videos: bool = True

     

       303
       303
       -
               

     

       304
       304
       -
               quote_gate = o.get('quote_gate')

     

       305
       305
       -
               if quote_gate is not None:

     

       306
       306
       -
                   self.quote_gate = bool(quote_gate)

     

       307
       307
       -
               

     

       308
       308
       -
               thread_gate = o.get('thread_gate')

     

       309
       309
       -
               if thread_gate is not None:

     

       310
       310
       -
                   if any([v not in ALLOWED_GATES for v in thread_gate]):

     

       311
       311
       -
                       raise ValueError(f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}")

     

       312
       312
       -
                   self.thread_gate = thread_gate

     

       313
       313
       -
               

     

       314
       314
       -
               encode_videos = o.get('encode_videos')

     

       315
       315
       -
               if encode_videos is not None:

     

       316
       316
       -
                   self.encode_videos = bool(encode_videos)

     

       317
       317
       -
       

     

       318
       318
       -
       class BlueskyOutput(cross.Output):

     

       319
       319
       -
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       320
       320
       -
               super().__init__(input, settings, db)

     

       321
       321
       -
               self.options = BlueskyOutputOptions(settings.get('options') or {})

     

       322
       322
       -
               

     

       323
       323
       -
               if not util.as_envvar(settings.get('app-password')):

     

       324
       324
       -
                   raise Exception("Account app password not provided!")

     

       325
       325
       -
               

     

       326
       326
       -
               did, pds = resolve_identity(

     

       327
       327
       -
                   handle=util.as_envvar(settings.get('handle')),

     

       328
       328
       -
                   did=util.as_envvar(settings.get('did')),

     

       329
       329
       -
                   pds=util.as_envvar(settings.get('pds'))

     

       330
       330
       -
               )

     

       331
       331
       -
               

     

       332
       332
       -
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       333
       333
       -
               

     

       334
       334
       -
               self.bsky = Client2(pds, request=reqs)

     

       335
       335
       -
               self.bsky.login(did, util.as_envvar(settings.get('app-password')))

     

       336
       336
       -
           

     

       337
       337
       -
           def _find_parent(self, parent_id: str):

     

       338
       338
       -
               login = self.bsky.me

     

       339
       339
       -
               if not login:

     

       340
       340
       -
                   raise Exception("Client not logged in!")

     

       341
       341
       -
               

     

       342
       342
       -
               thread_tuple = database.find_mapped_thread(

     

       343
       343
       -
                   self.db,

     

       344
       344
       -
                   parent_id,

     

       345
       345
       -
                   self.input.user_id,

     

       346
       346
       -
                   self.input.service,

     

       347
       347
       -
                   login.did,

     

       348
       348
       -
                   SERVICE

     

       349
       349
       -
               )

     

       350
       350
       -
               

     

       351
       351
       -
               if not thread_tuple:

     

       352
       352
       -
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       353
       353
       -
                   return None

     

       354
       354
       -
               

     

       355
       355
       -
               root_ref = json.loads(thread_tuple[0])

     

       356
       356
       -
               reply_ref = json.loads(thread_tuple[1])

     

       357
       357
       -
               

     

       358
       358
       -
               root_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_ref['uri']), cid=str(root_ref['cid']))

     

       359
       359
       -
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_ref['uri']), cid=str(reply_ref['cid']))

     

       360
       360
       -
               

     

       361
       361
       -
               return (

     

       362
       362
       -
                   models.create_strong_ref(root_record),

     

       363
       363
       -
                   models.create_strong_ref(reply_record),

     

       364
       364
       -
                   thread_tuple[2],

     

       365
       365
       -
                   thread_tuple[3]

     

       366
       366
       -
               )

     

       367
       367
       -
           

     

       368
       368
       -
           def _split_attachments(self, attachments: list[media_util.MediaInfo]):

     

       369
       369
       -
               sup_media: list[media_util.MediaInfo] = []

     

       370
       370
       -
               unsup_media: list[media_util.MediaInfo] = []

     

       371
       371
       -
               

     

       372
       372
       -
               for a in attachments:

     

       373
       373
       -
                   if a.mime.startswith('image/') or a.mime.startswith('video/'): # TODO convert gifs to videos

     

       374
       374
       -
                       sup_media.append(a)

     

       375
       375
       -
                   else:

     

       376
       376
       -
                       unsup_media.append(a)

     

       377
       377
       -
               

     

       378
       378
       -
               return (sup_media, unsup_media)

     

       379
       379
       -
       

     

       380
       380
       -
           def _split_media_per_post(

     

       381
       381
       -
               self, 

     

       382
       382
       -
               tokens: list[client_utils.TextBuilder], 

     

       383
       383
       -
               media: list[media_util.MediaInfo]):

     

       384
       384
       -
               

     

       385
       385
       -
               posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]

     

       386
       386
       -
               available_indices: list[int] = list(range(len(posts)))

     

       387
       387
       -
               

     

       388
       388
       -
               current_image_post_idx: int | None = None

     

       389
       389
       -
       

     

       390
       390
       -
               def make_blank_post() -> dict:

     

       391
       391
       -
                   return {

     

       392
       392
       -
                       "tokens": [client_utils.TextBuilder().text('')],

     

       393
       393
       -
                       "attachments": []

     

       394
       394
       -
                   }

     

       395
       395
       -
               

     

       396
       396
       -
               def pop_next_empty_index() -> int:

     

       397
       397
       -
                   if available_indices:

     

       398
       398
       -
                       return available_indices.pop(0)

     

       399
       399
       -
                   else:

     

       400
       400
       -
                       new_idx = len(posts)

     

       401
       401
       -
                       posts.append(make_blank_post())

     

       402
       402
       -
                       return new_idx

     

       403
       403
       -
               

     

       404
       404
       -
               for att in media:

     

       405
       405
       -
                   if att.mime.startswith('video/'):

     

       406
       406
       -
                       current_image_post_idx = None

     

       407
       407
       -
                       idx = pop_next_empty_index()

     

       408
       408
       -
                       posts[idx]["attachments"].append(att)

     

       409
       409
       -
                   elif att.mime.startswith('image/'):

     

       410
       410
       -
                       if (

     

       411
       411
       -
                           current_image_post_idx is not None

     

       412
       412
       -
                           and len(posts[current_image_post_idx]["attachments"]) < 4

     

       413
       413
       -
                       ):

     

       414
       414
       -
                           posts[current_image_post_idx]["attachments"].append(att)

     

       415
       415
       -
                       else:

     

       416
       416
       -
                           idx = pop_next_empty_index()

     

       417
       417
       -
                           posts[idx]["attachments"].append(att)

     

       418
       418
       -
                           current_image_post_idx = idx

     

       419
       419
       -
               

     

       420
       420
       -
               result: list[tuple[client_utils.TextBuilder, list[media_util.MediaInfo]]] = []

     

       421
       421
       -
               for p in posts:

     

       422
       422
       -
                   result.append((p["tokens"], p["attachments"]))

     

       423
       423
       -
               return result

     

       424
       424
       -
           

     

       425
       425
       -
           def accept_post(self, post: cross.Post):

     

       426
       426
       -
               login = self.bsky.me

     

       427
       427
       -
               if not login:

     

       428
       428
       -
                   raise Exception("Client not logged in!")

     

       429
       429
       -
               

     

       430
       430
       -
               parent_id = post.get_parent_id()

     

       431
       431
       -
               

     

       432
       432
       -
               # used for db insertion

     

       433
       433
       -
               new_root_id = None

     

       434
       434
       -
               new_parent_id = None

     

       435
       435
       -
               

     

       436
       436
       -
               root_ref = None

     

       437
       437
       -
               reply_ref = None

     

       438
       438
       -
               if parent_id:

     

       439
       439
       -
                   parents = self._find_parent(parent_id)

     

       440
       440
       -
                   if not parents:

     

       441
       441
       -
                       return

     

       442
       442
       -
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       443
       443
       -
               

     

       444
       444
       -
               tokens = post.get_tokens().copy()

     

       445
       445
       -
               

     

       446
       446
       -
               unique_labels: set[str] = set()

     

       447
       447
       -
               cw = post.get_cw()

     

       448
       448
       -
               if cw:

     

       449
       449
       -
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       450
       450
       -
                   unique_labels.add('graphic-media')

     

       451
       451
       -
               

     

       452
       452
       -
               # from bsky.app, a post can only have one of those labels

     

       453
       453
       -
               if PORN_PATTERN.search(cw):

     

       454
       454
       -
                   unique_labels.add('porn')

     

       455
       455
       -
               elif ADULT_PATTERN.search(cw):

     

       456
       456
       -
                   unique_labels.add('sexual')

     

       457
       457
       -
               

     

       458
       458
       -
               if post.is_sensitive():

     

       459
       459
       -
                   unique_labels.add('graphic-media')

     

       460
       460
       -
               

     

       461
       461
       -
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in unique_labels])

     

       462
       462
       -
       

     

       463
       463
       -
               sup_media, unsup_media = self._split_attachments(post.get_attachments())

     

       464
       464
       -
       

     

       465
       465
       -
               if unsup_media:

     

       466
       466
       -
                   if tokens:

     

       467
       467
       -
                       tokens.append(cross.TextToken('\n'))

     

       468
       468
       -
                   for i, attachment in enumerate(unsup_media):

     

       469
       469
       -
                       tokens.append(cross.LinkToken(

     

       470
       470
       -
                               attachment.url,

     

       471
       471
       -
                               f"[{media_util.get_filename_from_url(attachment.url)}]"

     

       472
       472
       -
                       ))

     

       473
       473
       -
                       tokens.append(cross.TextToken(' '))

     

       474
       474
       -
       

     

       475
       475
       -
               

     

       476
       476
       -
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       477
       477
       -
               post_text: list[client_utils.TextBuilder] = []

     

       478
       478
       -
               

     

       479
       479
       -
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       480
       480
       -
               for block in split_tokens:

     

       481
       481
       -
                   rich_text = tokens_to_richtext(block)

     

       482
       482
       -
                   

     

       483
       483
       -
                   if not rich_text:

     

       484
       484
       -
                       LOGGER.error("Skipping '%s' as it contains invalid rich text types!", post.get_id())

     

       485
       485
       -
                       return

     

       486
       486
       -
                   post_text.append(rich_text)

     

       487
       487
       -
               

     

       488
       488
       -
               if not post_text:

     

       489
       489
       -
                   post_text = [client_utils.TextBuilder().text('')]

     

       490
       490
       -
               

     

       491
       491
       -
               for m in sup_media:

     

       492
       492
       -
                   if m.mime.startswith('image/'):

     

       493
       493
       -
                       if len(m.io) > 2_000_000:

     

       494
       494
       -
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large.", post.get_id())

     

       495
       495
       -
                           return

     

       496
       496
       -
                   

     

       497
       497
       -
                   if m.mime.startswith('video/'):

     

       498
       498
       -
                       if m.mime != 'video/mp4' and not self.options.encode_videos:

     

       499
       499
       -
                           LOGGER.info("Video is not mp4, but encoding is disabled. Skipping '%s'...", post.get_id())

     

       500
       500
       -
                           return

     

       501
       501
       -
                       

     

       502
       502
       -
                       if len(m.io) > 100_000_000:

     

       503
       503
       -
                           LOGGER.error("Skipping post_id '%s', failed to download attachment! File too large?", post.get_id())

     

       504
       504
       -
                           return

     

       505
       505
       -
               

     

       506
       506
       -
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       507
       507
       -
               baked_media = self._split_media_per_post(post_text, sup_media)

     

       508
       508
       -
               

     

       509
       509
       -
               for text, attachments in baked_media:

     

       510
       510
       -
                   if not attachments:

     

       511
       511
       -
                       if reply_ref and root_ref:

     

       512
       512
       -
                           new_post = self.bsky.send_post(text, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       513
       513
       -
                               parent=reply_ref,

     

       514
       514
       -
                               root=root_ref

     

       515
       515
       -
                           ), labels=labels, time_iso=post.get_post_date_iso())

     

       516
       516
       -
                       else:

     

       517
       517
       -
                           new_post = self.bsky.send_post(text, labels=labels, time_iso=post.get_post_date_iso())

     

       518
       518
       -
                           root_ref = models.create_strong_ref(new_post)

     

       519
       519
       -
                       

     

       520
       520
       -
                       self.bsky.create_gates(

     

       521
       521
       -
                           self.options.thread_gate, 

     

       522
       522
       -
                           self.options.quote_gate, 

     

       523
       523
       -
                           new_post.uri, 

     

       524
       524
       -
                           time_iso=post.get_post_date_iso()

     

       525
       525
       -
                       )

     

       526
       526
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       527
       527
       -
                       created_records.append(new_post)

     

       528
       528
       -
                   else:

     

       529
       529
       -
                       # if a single post is an image - everything else is an image

     

       530
       530
       -
                       if attachments[0].mime.startswith('image/'):

     

       531
       531
       -
                           images: list[bytes] = []

     

       532
       532
       -
                           image_alts: list[str] = []

     

       533
       533
       -
                           image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       534
       534
       -
                           

     

       535
       535
       -
                           for attachment in attachments:

     

       536
       536
       -
                               image_io = media_util.compress_image(attachment.io, quality=100)

     

       537
       537
       -
                               metadata = media_util.get_media_meta(image_io)

     

       538
       538
       -
                           

     

       539
       539
       -
                               if len(image_io) > 1_000_000:

     

       540
       540
       -
                                   LOGGER.info("Compressing %s...", attachment.name)

     

       541
       541
       -
                                   image_io = media_util.compress_image(image_io)

     

       542
       542
       -
                           

     

       543
       543
       -
                               images.append(image_io)

     

       544
       544
       -
                               image_alts.append(attachment.alt)

     

       545
       545
       -
                               image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(

     

       546
       546
       -
                                   width=metadata['width'], 

     

       547
       547
       -
                                   height=metadata['height']

     

       548
       548
       -
                               ))

     

       549
       549
       -
                           

     

       550
       550
       -
                           new_post = self.bsky.send_images(

     

       551
       551
       -
                               text=post_text[0],

     

       552
       552
       -
                               images=images,

     

       553
       553
       -
                               image_alts=image_alts,

     

       554
       554
       -
                               image_aspect_ratios=image_aspect_ratios,

     

       555
       555
       -
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       556
       556
       -
                                   parent=reply_ref,

     

       557
       557
       -
                                   root=root_ref

     

       558
       558
       -
                               ) if root_ref and reply_ref else None, 

     

       559
       559
       -
                               labels=labels, 

     

       560
       560
       -
                               time_iso=post.get_post_date_iso()

     

       561
       561
       -
                           )

     

       562
       562
       -
                           if not root_ref:

     

       563
       563
       -
                               root_ref = models.create_strong_ref(new_post)

     

       564
       564
       -
                           

     

       565
       565
       -
                           self.bsky.create_gates(

     

       566
       566
       -
                               self.options.thread_gate, 

     

       567
       567
       -
                               self.options.quote_gate,

     

       568
       568
       -
                               new_post.uri, 

     

       569
       569
       -
                               time_iso=post.get_post_date_iso()

     

       570
       570
       -
                           )

     

       571
       571
       -
                           reply_ref = models.create_strong_ref(new_post)

     

       572
       572
       -
                           created_records.append(new_post)

     

       573
       573
       -
                       else: # video is guarantedd to be one

     

       574
       574
       -
                           metadata = media_util.get_media_meta(attachments[0].io)

     

       575
       575
       -
                           if metadata['duration'] > 180:

     

       576
       576
       -
                               LOGGER.info("Skipping post_id '%s', video attachment too long!", post.get_id())

     

       577
       577
       -
                               return

     

       578
       578
       -
                       

     

       579
       579
       -
                           video_io = attachments[0].io

     

       580
       580
       -
                           if attachments[0].mime != 'video/mp4':

     

       581
       581
       -
                               LOGGER.info("Converting %s to mp4...", attachments[0].name)

     

       582
       582
       -
                               video_io = media_util.convert_to_mp4(video_io)

     

       583
       583
       -
                               

     

       584
       584
       -
                           aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       585
       585
       -
                               width=metadata['width'], 

     

       586
       586
       -
                               height=metadata['height']

     

       587
       587
       -
                           )

     

       588
       588
       -
                           

     

       589
       589
       -
                           new_post = self.bsky.send_video(

     

       590
       590
       -
                               text=post_text[0],

     

       591
       591
       -
                               video=video_io,

     

       592
       592
       -
                               video_aspect_ratio=aspect_ratio,

     

       593
       593
       -
                               video_alt=attachments[0].alt,

     

       594
       594
       -
                               reply_to= models.AppBskyFeedPost.ReplyRef(

     

       595
       595
       -
                                   parent=reply_ref,

     

       596
       596
       -
                                   root=root_ref

     

       597
       597
       -
                               ) if root_ref and reply_ref else None,

     

       598
       598
       -
                               labels=labels,

     

       599
       599
       -
                               time_iso=post.get_post_date_iso()

     

       600
       600
       -
                           )

     

       601
       601
       -
                           if not root_ref:

     

       602
       602
       -
                               root_ref = models.create_strong_ref(new_post)

     

       603
       603
       -
                           

     

       604
       604
       -
                           self.bsky.create_gates(

     

       605
       605
       -
                               self.options.thread_gate,

     

       606
       606
       -
                               self.options.quote_gate, 

     

       607
       607
       -
                               new_post.uri, 

     

       608
       608
       -
                               time_iso=post.get_post_date_iso()

     

       609
       609
       -
                           )

     

       610
       610
       -
                           reply_ref = models.create_strong_ref(new_post)

     

       611
       611
       -
                           created_records.append(new_post)

     

       612
       612
       -
               

     

       613
       613
       -
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       614
       614
       -
               assert db_post, "ghghghhhhh"

     

       615
       615
       -
               

     

       616
       616
       -
               db_identifiers = [json.dumps(cr.model_dump(), sort_keys=True) for cr in created_records]

     

       617
       617
       -
               

     

       618
       618
       -
               if new_root_id is None or  new_parent_id is None:

     

       619
       619
       -
                   new_root_id = database.insert_post(

     

       620
       620
       -
                       self.db,

     

       621
       621
       -
                       db_identifiers[0],

     

       622
       622
       -
                       login.did,

     

       623
       623
       -
                       SERVICE

     

       624
       624
       -
                   )

     

       625
       625
       -
                   new_parent_id = new_root_id

     

       626
       626
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       627
       627
       -
                   db_identifiers = db_identifiers[1:]

     

       628
       628
       -
               

     

       629
       629
       -
               for db_id in db_identifiers:

     

       630
       630
       -
                   new_parent_id = database.insert_reply(

     

       631
       631
       -
                       self.db, 

     

       632
       632
       -
                       db_id,

     

       633
       633
       -
                       login.did,

     

       634
       634
       -
                       SERVICE,

     

       635
       635
       -
                       new_parent_id,

     

       636
       636
       -
                       new_root_id

     

       637
       637
       -
                   )

     

       638
       638
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       639
       639
       -
                   

     

       640
       640
       -
           def delete_post(self, identifier: str):

     

       641
       641
       -
               login = self.bsky.me

     

       642
       642
       -
               if not login:

     

       643
       643
       -
                   raise Exception("Client not logged in!")

     

       644
       644
       -
               

     

       645
       645
       -
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       646
       646
       -
               if not post:

     

       647
       647
       -
                   return

     

       648
       648
       -
               

     

       649
       649
       -
               mappings = database.find_mappings(self.db, post['id'], SERVICE, login.did)

     

       650
       650
       -
               for mapping in mappings[::-1]:

     

       651
       651
       -
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       652
       652
       -
                   self.bsky.delete_post(json.loads(mapping[0])['uri'])

     

       653
       653
       -
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       654
       654
       -
           

     

       655
       655
       -
       

     

       656
       656
       -
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       657
       657
       -
           builder = client_utils.TextBuilder()

     

       658
       658
       -
           

     

       659
       659
       -
           def flatten_link(href: str):

     

       660
       660
       -
               split = href.split('://', 1)

     

       661
       661
       -
               if len(split) > 1:

     

       662
       662
       -
                   href = split[1]

     

       663
       663
       -
               

     

       664
       664
       -
               if len(href) > 32:

     

       665
       665
       -
                   href = href[:32] + '...'

     

       666
       666
       -
               

     

       667
       667
       -
               return href

     

       668
       668
       -
            

     

       669
       669
       -
           for token in tokens:

     

       670
       670
       -
               if isinstance(token, cross.TextToken):

     

       671
       671
       -
                   builder.text(token.text)

     

       672
       672
       -
               elif isinstance(token, cross.LinkToken):

     

       673
       673
       -
                   if util.canonical_label(token.label, token.href):

     

       674
       674
       -
                       builder.link(flatten_link(token.href), token.href)

     

       675
       675
       -
                       continue

     

       676
       676
       -
                   

     

       677
       677
       -
                   builder.link(token.label, token.href)

     

       678
       678
       -
               elif isinstance(token, cross.TagToken):

     

       679
       679
       -
                   builder.tag('#' + token.tag, token.tag)

     

       680
       680
       -
               else:

     

       681
       681
       -
                   # fail on unsupported tokens

     

       682
       682
       -
                   return None

     

       683
       683
       -
               

     

       684
       684
       -
           return builder

+151 -232

cross.py

···

       1
       1
       -
       from typing import Callable, Any

     

       2
       2
       -
       from database import DataBaseWorker

     

       3
       3
       -
       from datetime import datetime, timezone

     

       4
       4
       -
       from media_util import MediaInfo

     

       5
       5
       -
       from util import LOGGER

     

       6
       6
       -
       import util

     

       7
       1
        
       import re

     

       2
       2
       +
       from abc import ABC, abstractmethod

     

       3
       3
       +
       from datetime import datetime, timezone

     

       4
       4
       +
       from typing import Any, Callable

     

       8
       5
        
       

     

       9
       9
       -
       ALTERNATE = re.compile(r'\S+|\s+')

     

       10
       10
       -
       URL = re.compile(r'(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+', re.IGNORECASE)

     

       11
       11
       -
       MD_INLINE_LINK = re.compile(r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)", re.IGNORECASE)

     

       12
       12
       -
       MD_AUTOLINK = re.compile(r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE)

     

       13
       13
       -
       HASHTAG = re.compile(r'(?<!\w)\#([\w]+)')

     

       14
       14
       -
       FEDIVERSE_HANDLE = re.compile(r'(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?')

     

       6
       6
       +
       from util.database import DataBaseWorker

     

       7
       7
       +
       from util.media import MediaInfo

     

       8
       8
       +
       from util.util import LOGGER, canonical_label

     

       9
       9
       +
       

     

       10
       10
       +
       ALTERNATE = re.compile(r"\S+|\s+")

     

       11
       11
       +
       

     

       15
       12
        
       

     

       16
       13
        
       # generic token

     

       17
       17
       -
       class Token():

     

       14
       14
       +
       class Token:

     

       18
       15
        
           def __init__(self, type: str) -> None:

     

       19
       16
        
               self.type = type

     

       17
       17
       +
       

     

       20
       18
        
       

     

       21
       19
        
       class TextToken(Token):

     

       22
       20
        
           def __init__(self, text: str) -> None:

     

       23
       23
       -
               super().__init__('text')

     

       21
       21
       +
               super().__init__("text")

     

       24
       22
        
               self.text = text

     

       23
       23
       +
       

     

       25
       24
        
       

     

       26
       25
        
       # token that represents a link to a website. e.g. [link](https://google.com/)

     

       27
       26
        
       class LinkToken(Token):

     

       28
       27
        
           def __init__(self, href: str, label: str) -> None:

     

       29
       29
       -
               super().__init__('link')

     

       28
       28
       +
               super().__init__("link")

     

       30
       29
        
               self.href = href

     

       31
       30
        
               self.label = label

     

       32
       32
       -
             

     

       33
       33
       -
       # token that represents a hashtag. e.g. #SocialMedia  

     

       31
       31
       +
       

     

       32
       32
       +
       

     

       33
       33
       +
       # token that represents a hashtag. e.g. #SocialMedia

     

       34
       34
        
       class TagToken(Token):

     

       35
       35
        
           def __init__(self, tag: str) -> None:

     

       36
       36
       -
               super().__init__('tag')

     

       36
       36
       +
               super().__init__("tag")

     

       37
       37
        
               self.tag = tag

     

       38
       38
        
       

     

       39
       39
       +
       

     

       39
       40
        
       # token that represents a mention of a user.

     

       40
       41
        
       class MentionToken(Token):

     

       41
       42
        
           def __init__(self, username: str, uri: str) -> None:

     

       42
       42
       -
               super().__init__('mention')

     

       43
       43
       +
               super().__init__("mention")

     

       43
       44
        
               self.username = username

     

       44
       45
        
               self.uri = uri

     

       45
       45
       -
           

     

       46
       46
       -
       class MediaMeta():

     

       46
       46
       +
       

     

       47
       47
       +
       

     

       48
       48
       +
       class MediaMeta:

     

       47
       49
        
           def __init__(self, width: int, height: int, duration: float) -> None:

     

       48
       50
        
               self.width = width

     

       49
       51
        
               self.height = height

     

       50
       52
        
               self.duration = duration

     

       51
       51
       -
           

     

       53
       53
       +
       

     

       52
       54
        
           def get_width(self) -> int:

     

       53
       55
        
               return self.width

     

       54
       54
       -
           

     

       56
       56
       +
       

     

       55
       57
        
           def get_height(self) -> int:

     

       56
       58
        
               return self.height

     

       57
       57
       -
           

     

       59
       59
       +
       

     

       58
       60
        
           def get_duration(self) -> float:

     

       59
       61
        
               return self.duration

     

       60
       60
       -
           

     

       61
       61
       -
       class Post():

     

       62
       62
       -
           def __init__(self) -> None:

     

       62
       62
       +
       

     

       63
       63
       +
       

     

       64
       64
       +
       class Post(ABC):

     

       65
       65
       +
           @abstractmethod

     

       66
       66
       +
           def get_id(self) -> str:

     

       67
       67
       +
               return ""

     

       68
       68
       +
       

     

       69
       69
       +
           @abstractmethod

     

       70
       70
       +
           def get_parent_id(self) -> str | None:

     

       63
       71
        
               pass

     

       64
       64
       -
           

     

       72
       72
       +
       

     

       73
       73
       +
           @abstractmethod

     

       65
       74
        
           def get_tokens(self) -> list[Token]:

     

       66
       66
       -
               return []

     

       67
       67
       -
           

     

       68
       68
       -
           def get_parent_id(self) -> str | None:

     

       69
       69
       -
               return None

     

       70
       70
       -
           

     

       71
       71
       -
           def get_post_date_iso(self) -> str:

     

       72
       72
       -
               return datetime.now(timezone.utc).isoformat()

     

       73
       73
       -
           

     

       75
       75
       +
               pass

     

       76
       76
       +
       

     

       77
       77
       +
           # returns input text type.

     

       78
       78
       +
           # text/plain, text/markdown, text/x.misskeymarkdown

     

       79
       79
       +
           @abstractmethod

     

       80
       80
       +
           def get_text_type(self) -> str:

     

       81
       81
       +
               pass

     

       82
       82
       +
       

     

       83
       83
       +
           # post iso timestamp

     

       84
       84
       +
           @abstractmethod

     

       85
       85
       +
           def get_timestamp(self) -> str:

     

       86
       86
       +
               pass

     

       87
       87
       +
       

     

       74
       88
        
           def get_attachments(self) -> list[MediaInfo]:

     

       75
       89
        
               return []

     

       76
       76
       -
           

     

       77
       77
       -
           def get_id(self) -> str:

     

       78
       78
       -
               return ''

     

       79
       79
       -
           

     

       80
       80
       -
           def get_cw(self) -> str:

     

       81
       81
       -
               return ''

     

       82
       82
       -
           

     

       90
       90
       +
       

     

       91
       91
       +
           def get_spoiler(self) -> str | None:

     

       92
       92
       +
               return None

     

       93
       93
       +
       

     

       83
       94
        
           def get_languages(self) -> list[str]:

     

       84
       95
        
               return []

     

       85
       85
       -
           

     

       96
       96
       +
       

     

       86
       97
        
           def is_sensitive(self) -> bool:

     

       87
       98
        
               return False

     

       88
       99
        
       

     

       100
       100
       +
           def get_post_url(self) -> str | None:

     

       101
       101
       +
               return None

     

       102
       102
       +
       

     

       103
       103
       +
       

     

       89
       104
        
       # generic input service.

     

       90
       105
        
       # user and service for db queries

     

       91
       91
       -
       class Input():

     

       92
       92
       -
           def __init__(self, service: str, user_id: str, settings: dict, db: DataBaseWorker) -> None:

     

       106
       106
       +
       class Input:

     

       107
       107
       +
           def __init__(

     

       108
       108
       +
               self, service: str, user_id: str, settings: dict, db: DataBaseWorker

     

       109
       109
       +
           ) -> None:

     

       93
       110
        
               self.service = service

     

       94
       111
        
               self.user_id = user_id

     

       95
       112
        
               self.settings = settings

     

       96
       113
        
               self.db = db

     

       97
       97
       -
           

     

       114
       114
       +
       

     

       98
       115
        
           async def listen(self, outputs: list, handler: Callable[[Post], Any]):

     

       99
       116
        
               pass

     

       100
       117
        
       

     

       101
       101
       -
       class Output():

     

       118
       118
       +
       

     

       119
       119
       +
       class Output:

     

       102
       120
        
           def __init__(self, input: Input, settings: dict, db: DataBaseWorker) -> None:

     

       103
       121
        
               self.input = input

     

       104
       122
        
               self.settings = settings

     

       105
       123
        
               self.db = db

     

       106
       106
       -
           

     

       124
       124
       +
       

     

       107
       125
        
           def accept_post(self, post: Post):

     

       108
       126
        
               LOGGER.warning('Not Implemented.. "posted" %s', post.get_id())

     

       109
       109
       -
           

     

       127
       127
       +
       

     

       110
       128
        
           def delete_post(self, identifier: str):

     

       111
       129
        
               LOGGER.warning('Not Implemented.. "deleted" %s', identifier)

     

       112
       112
       -
               

     

       130
       130
       +
       

     

       113
       131
        
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       114
       132
        
               LOGGER.warning('Not Implemented.. "reblogged" %s, %s', repost_id, reposted_id)

     

       115
       115
       -
           

     

       133
       133
       +
       

     

       116
       134
        
           def delete_repost(self, repost_id: str):

     

       117
       135
        
               LOGGER.warning('Not Implemented.. "removed reblog" %s', repost_id)

     

       118
       118
       -
           

     

       119
       119
       -
           def accept_quote(self, quote: Post, quoted_id: str):

     

       120
       120
       -
               LOGGER.warning('Not Implemented.. "quoted" %s, %s', quote.get_id(), quoted_id)

     

       121
       121
       -
           

     

       122
       122
       -
           def delete_quote(self, quote_id: str):

     

       123
       123
       -
               LOGGER.warning('Not Implemented.. "removed quote" %s', quote_id)

     

       124
       136
        
       

     

       125
       125
       -
       def tokenize_markdown(text: str, tags: list[str], handles: list[tuple[str, str]]) -> list[Token]:

     

       126
       126
       -
           if not text:

     

       127
       127
       -
               return []

     

       128
       128
       -
           

     

       129
       129
       -
           index: int = 0

     

       130
       130
       -
           total: int = len(text)

     

       131
       131
       -
           buffer: list[str] = []

     

       132
       132
       -
           

     

       133
       133
       -
           tokens: list[Token] = []

     

       134
       134
       -
           

     

       135
       135
       -
           def flush():

     

       136
       136
       -
               nonlocal buffer

     

       137
       137
       -
               if buffer:

     

       138
       138
       -
                   tokens.append(TextToken(''.join(buffer)))

     

       139
       139
       -
                   buffer = []

     

       140
       140
       -
           

     

       141
       141
       -
           while index < total:

     

       142
       142
       -
               if text[index] == '[':

     

       143
       143
       -
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       144
       144
       -
                   if md_inline:

     

       145
       145
       -
                       flush()

     

       146
       146
       -
                       label = md_inline.group(1)

     

       147
       147
       -
                       href = md_inline.group(2)

     

       148
       148
       -
                       tokens.append(LinkToken(href, label))

     

       149
       149
       -
                       index = md_inline.end()

     

       150
       150
       -
                       continue

     

       151
       151
       -
               

     

       152
       152
       -
               if text[index] == '<':

     

       153
       153
       -
                   md_auto = MD_AUTOLINK.match(text, index)

     

       154
       154
       -
                   if md_auto:

     

       155
       155
       -
                       flush()

     

       156
       156
       -
                       href = md_auto.group(1)

     

       157
       157
       -
                       tokens.append(LinkToken(href, href))

     

       158
       158
       -
                       index = md_auto.end()

     

       159
       159
       -
                       continue

     

       160
       160
       -
               

     

       161
       161
       -
               if text[index] == '#':

     

       162
       162
       -
                   tag = HASHTAG.match(text, index)

     

       163
       163
       -
                   if tag:

     

       164
       164
       -
                       tag_text = tag.group(1)

     

       165
       165
       -
                       if tag_text.lower() in tags:

     

       166
       166
       -
                           flush()

     

       167
       167
       -
                           tokens.append(TagToken(tag_text))

     

       168
       168
       -
                           index = tag.end()

     

       169
       169
       -
                           continue

     

       170
       170
       -
               

     

       171
       171
       -
               if text[index] == '@':

     

       172
       172
       -
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       173
       173
       -
                   if handle:

     

       174
       174
       -
                       handle_text = handle.group(0)

     

       175
       175
       -
                       stripped_handle = handle_text.strip()

     

       176
       176
       -
                       

     

       177
       177
       -
                       match = next(

     

       178
       178
       -
                           (pair for pair in handles if stripped_handle in pair),

     

       179
       179
       -
                           None

     

       180
       180
       -
                       )

     

       181
       181
       -
                       

     

       182
       182
       -
                       if match:

     

       183
       183
       -
                           flush()

     

       184
       184
       -
                           tokens.append(MentionToken(match[1], ''))  # TODO: misskey doesn’t provide a uri

     

       185
       185
       -
                           index = handle.end()

     

       186
       186
       -
                           continue

     

       187
       187
       -
               

     

       188
       188
       -
               url = URL.match(text, index)

     

       189
       189
       -
               if url:

     

       190
       190
       -
                   flush()

     

       191
       191
       -
                   href = url.group(0)

     

       192
       192
       -
                   tokens.append(LinkToken(href, href))

     

       193
       193
       -
                   index = url.end()

     

       194
       194
       -
                   continue

     

       195
       195
       -
               

     

       196
       196
       -
               buffer.append(text[index])

     

       197
       197
       -
               index += 1

     

       198
       198
       -
                       

     

       199
       199
       -
           flush()

     

       200
       200
       -
           return tokens

     

       201
       137
        
       

     

       202
       202
       -
       def split_tokens(tokens: list[Token], max_chars: int, max_link_len: int = 35) -> list[list[Token]]:

     

       203
       203
       -
           def start_new_block():

     

       204
       204
       -
               nonlocal current_block, blocks, current_length

     

       205
       205
       -
               if current_block:

     

       206
       206
       -
                   blocks.append(current_block)

     

       207
       207
       -
               current_block = []

     

       208
       208
       -
               current_length = 0

     

       138
       138
       +
       def test_filters(tokens: list[Token], filters: list[re.Pattern[str]]):

     

       139
       139
       +
           if not tokens or not filters:

     

       140
       140
       +
               return True

     

       209
       141
        
       

     

       210
       210
       -
           def append_text_to_block(text_segment):

     

       211
       211
       -
               nonlocal current_block

     

       212
       212
       -
               # if the last element in the current block is also text, just append to it

     

       213
       213
       -
               if current_block and isinstance(current_block[-1], TextToken):

     

       214
       214
       -
                   current_block[-1].text += text_segment

     

       215
       215
       -
               else:

     

       216
       216
       -
                   current_block.append(TextToken(text_segment))

     

       217
       217
       -
           

     

       218
       218
       -
           blocks: list[list[Token]] = []

     

       219
       219
       -
           current_block: list[Token] = []

     

       220
       220
       -
           current_length: int = 0

     

       142
       142
       +
           markdown = ""

     

       221
       143
        
       

     

       222
       144
        
           for token in tokens:

     

       223
       145
        
               if isinstance(token, TextToken):

     

       224
       224
       -
                   # split content into alternating “words” (\S+) and “whitespace” (\s+).

     

       225
       225
       -
                   # this ensures every space/newline is treated as its own segment.

     

       226
       226
       -
                   segments: list[str] = ALTERNATE.findall(token.text)

     

       146
       146
       +
                   markdown += token.text

     

       147
       147
       +
               elif isinstance(token, LinkToken):

     

       148
       148
       +
                   markdown += f"[{token.label}]({token.href})"

     

       149
       149
       +
               elif isinstance(token, TagToken):

     

       150
       150
       +
                   markdown += "#" + token.tag

     

       151
       151
       +
               elif isinstance(token, MentionToken):

     

       152
       152
       +
                   markdown += token.username

     

       227
       153
        
       

     

       228
       228
       -
                   for seg in segments:

     

       229
       229
       -
                       if seg.isspace():

     

       230
       230
       -
                           # whitespace segment: we count it, and if it doesn't fully fit,

     

       231
       231
       -
                           # split the whitespace across blocks to preserve exact spacing.

     

       232
       232
       -
                           seg_len: int = len(seg)

     

       233
       233
       -
                           while seg_len > 0:

     

       234
       234
       -
                               space_left = max_chars - current_length

     

       235
       235
       -
                               if space_left == 0:

     

       236
       236
       -
                                   start_new_block()

     

       237
       237
       -
                                   continue

     

       154
       154
       +
           for filter in filters:

     

       155
       155
       +
               if filter.search(markdown):

     

       156
       156
       +
                   return False

     

       238
       157
        
       

     

       239
       239
       -
                               take = min(space_left, seg_len)

     

       240
       240
       -
                               part = seg[:take]

     

       241
       241
       -
                               append_text_to_block(part)

     

       158
       158
       +
           return True

     

       242
       159
        
       

     

       243
       243
       -
                               current_length += len(part)

     

       244
       244
       -
                               seg = seg[take:]

     

       245
       245
       -
                               seg_len -= take

     

       246
       160
        
       

     

       247
       247
       -
                               if current_length == max_chars:

     

       248
       248
       -
                                   start_new_block()

     

       161
       161
       +
       def split_tokens(

     

       162
       162
       +
           tokens: list[Token], max_chars: int, max_link_len: int = 35

     

       163
       163
       +
       ) -> list[list[Token]]:

     

       164
       164
       +
           def new_block():

     

       165
       165
       +
               nonlocal blocks, block, length

     

       166
       166
       +
               if block:

     

       167
       167
       +
                   blocks.append(block)

     

       168
       168
       +
               block = []

     

       169
       169
       +
               length = 0

     

       249
       170
        
       

     

       250
       250
       -
                       else:

     

       251
       251
       -
                           # seg is a “word” (no whitespace inside).

     

       252
       252
       -
                           word: str = seg

     

       253
       253
       -
                           wlen: int = len(word)

     

       171
       171
       +
           def append_text(text_segment):

     

       172
       172
       +
               nonlocal block

     

       173
       173
       +
               # if the last element in the current block is also text, just append to it

     

       174
       174
       +
               if block and isinstance(block[-1], TextToken):

     

       175
       175
       +
                   block[-1].text += text_segment

     

       176
       176
       +
               else:

     

       177
       177
       +
                   block.append(TextToken(text_segment))

     

       254
       178
        
       

     

       255
       255
       -
                           # if the word itself is longer than n, we must split it with hyphens.

     

       256
       256
       -
                           if wlen > max_chars:

     

       257
       257
       -
                               # first, if we're in the middle of a block, close it & start fresh.

     

       258
       258
       -
                               if current_length > 0:

     

       259
       259
       -
                                   start_new_block()

     

       179
       179
       +
           blocks: list[list[Token]] = []

     

       180
       180
       +
           block: list[Token] = []

     

       181
       181
       +
           length = 0

     

       260
       182
        
       

     

       261
       261
       -
                               remaining = word

     

       262
       262
       -
                               # carve off (n-1)-sized chunks + “-” so each chunk is n chars.

     

       263
       263
       -
                               while len(remaining) > (max_chars - 1):

     

       264
       264
       -
                                   chunk = remaining[: max_chars - 1] + '-'

     

       265
       265
       -
                                   append_text_to_block(chunk)

     

       266
       266
       -
                                   # that chunk fills the current block

     

       267
       267
       -
                                   start_new_block()

     

       268
       268
       -
                                   remaining = remaining[max_chars - 1 :]

     

       183
       183
       +
           for tk in tokens:

     

       184
       184
       +
               if isinstance(tk, TagToken):

     

       185
       185
       +
                   tag_len = 1 + len(tk.tag)  # (#) + tag

     

       186
       186
       +
                   if length + tag_len > max_chars:

     

       187
       187
       +
                       new_block()  # create new block if the current one is too large

     

       269
       188
        
       

     

       270
       270
       -
                               # now whatever remains is ≤ n characters

     

       271
       271
       -
                               if remaining:

     

       272
       272
       -
                                   append_text_to_block(remaining)

     

       273
       273
       -
                                   current_length = len(remaining)

     

       189
       189
       +
                   block.append(tk)

     

       190
       190
       +
                   length += tag_len

     

       191
       191
       +
               elif isinstance(tk, LinkToken):  # TODO labels should proably be split too

     

       192
       192
       +
                   link_len = len(tk.label)

     

       193
       193
       +
                   if canonical_label(

     

       194
       194
       +
                       tk.label, tk.href

     

       195
       195
       +
                   ):  # cut down the link if the label is canonical

     

       196
       196
       +
                       link_len = min(link_len, max_link_len)

     

       274
       197
        
       

     

       275
       275
       -
                           else:

     

       276
       276
       -
                               # word fits fully within a block (≤ n).

     

       277
       277
       -
                               if current_length + wlen <= max_chars:

     

       278
       278
       -
                                   append_text_to_block(word)

     

       279
       279
       -
                                   current_length += wlen

     

       280
       280
       -
                               else:

     

       281
       281
       -
                                   # not enough space in current block → start a new one

     

       282
       282
       -
                                   start_new_block()

     

       283
       283
       -
                                   append_text_to_block(word)

     

       284
       284
       -
                                   current_length = wlen

     

       198
       198
       +
                   if length + link_len > max_chars:

     

       199
       199
       +
                       new_block()

     

       200
       200
       +
                   block.append(tk)

     

       201
       201
       +
                   length += link_len

     

       202
       202
       +
               elif isinstance(tk, TextToken):

     

       203
       203
       +
                   segments: list[str] = ALTERNATE.findall(tk.text)

     

       285
       204
        
       

     

       286
       286
       -
               elif isinstance(token, LinkToken):

     

       287
       287
       -
                   link_len = len(token.label)

     

       288
       288
       -
                   if util.canonical_label(token.label, token.href):

     

       289
       289
       -
                       link_len = min(link_len, max_link_len)

     

       205
       205
       +
                   for seg in segments:

     

       206
       206
       +
                       seg_len: int = len(seg)

     

       207
       207
       +
                       if length + seg_len <= max_chars - (0 if seg.isspace() else 1):

     

       208
       208
       +
                           append_text(seg)

     

       209
       209
       +
                           length += seg_len

     

       210
       210
       +
                           continue

     

       290
       211
        
       

     

       291
       291
       -
                   if current_length + link_len <= max_chars:

     

       292
       292
       -
                       current_block.append(token)

     

       293
       293
       -
                       current_length += link_len

     

       294
       294
       -
                   else:

     

       295
       295
       -
                       start_new_block()

     

       296
       296
       -
                       current_block.append(token)

     

       297
       297
       -
                       current_length = link_len

     

       212
       212
       +
                       if length > 0:

     

       213
       213
       +
                           new_block()

     

       298
       214
        
       

     

       299
       299
       -
               elif isinstance(token, TagToken):

     

       300
       300
       -
                   # we treat a hashtag like “#tagname” for counting.

     

       301
       301
       -
                   hashtag_len = 1 + len(token.tag)

     

       302
       302
       -
                   if current_length + hashtag_len <= max_chars:

     

       303
       303
       -
                       current_block.append(token)

     

       304
       304
       -
                       current_length += hashtag_len

     

       305
       305
       -
                   else:

     

       306
       306
       -
                       start_new_block()

     

       307
       307
       -
                       current_block.append(token)

     

       308
       308
       -
                       current_length = hashtag_len

     

       215
       215
       +
                       if not seg.isspace():

     

       216
       216
       +
                           while len(seg) > max_chars - 1:

     

       217
       217
       +
                               chunk = seg[: max_chars - 1] + "-"

     

       218
       218
       +
                               append_text(chunk)

     

       219
       219
       +
                               new_block()

     

       220
       220
       +
                               seg = seg[max_chars - 1 :]

     

       221
       221
       +
                       else:

     

       222
       222
       +
                           while len(seg) > max_chars:

     

       223
       223
       +
                               chunk = seg[:max_chars]

     

       224
       224
       +
                               append_text(chunk)

     

       225
       225
       +
                               new_block()

     

       226
       226
       +
                               seg = seg[max_chars:]

     

       309
       227
        
       

     

       310
       310
       -
               else:

     

       311
       311
       -
                   # if you happen to have other types, just append them without affecting length.

     

       312
       312
       -
                   current_block.append(token)

     

       228
       228
       +
                       if seg:

     

       229
       229
       +
                           append_text(seg)

     

       230
       230
       +
                           length = len(seg)

     

       231
       231
       +
               else:  # TODO fix mentions

     

       232
       232
       +
                   block.append(tk)

     

       313
       233
        
       

     

       314
       314
       -
           # append any remaining tokens as the final block

     

       315
       315
       -
           if current_block:

     

       316
       316
       -
               blocks.append(current_block)

     

       234
       234
       +
           if block:

     

       235
       235
       +
               blocks.append(block)

     

       317
       236
        
       

     

       318
       318
       -
           return blocks
     

       237
       237
       +
           return blocks

-191

database.py

···

       1
       1
       -
       import sqlite3

     

       2
       2
       -
       from concurrent.futures import Future

     

       3
       3
       -
       import threading

     

       4
       4
       -
       import queue

     

       5
       5
       -
       

     

       6
       6
       -
       class DataBaseWorker():

     

       7
       7
       -
           def __init__(self, database: str) -> None:

     

       8
       8
       -
               super(DataBaseWorker, self).__init__()

     

       9
       9
       -
               self.database = database

     

       10
       10
       -
               self.queue = queue.Queue()

     

       11
       11
       -
               self.thread = threading.Thread(target=self._run, daemon=True)

     

       12
       12
       -
               self.shutdown_event = threading.Event()

     

       13
       13
       -
               self.conn = sqlite3.connect(self.database, check_same_thread=False)

     

       14
       14
       -
               self.lock = threading.Lock()

     

       15
       15
       -
               self.thread.start()

     

       16
       16
       -
           

     

       17
       17
       -
           def _run(self):

     

       18
       18
       -
               while not self.shutdown_event.is_set():

     

       19
       19
       -
                   try:

     

       20
       20
       -
                       task, future = self.queue.get(timeout=1)

     

       21
       21
       -
                       try:

     

       22
       22
       -
                           with self.lock:

     

       23
       23
       -
                               result = task(self.conn)

     

       24
       24
       -
                           future.set_result(result)

     

       25
       25
       -
                       except Exception as e:

     

       26
       26
       -
                           future.set_exception(e)

     

       27
       27
       -
                       finally:

     

       28
       28
       -
                           self.queue.task_done()

     

       29
       29
       -
                   except queue.Empty:

     

       30
       30
       -
                       continue

     

       31
       31
       -
           

     

       32
       32
       -
           def execute(self, sql: str, params = ()):

     

       33
       33
       -
               def task(conn: sqlite3.Connection):

     

       34
       34
       -
                   cursor = conn.execute(sql, params)

     

       35
       35
       -
                   conn.commit()

     

       36
       36
       -
                   return cursor.fetchall()

     

       37
       37
       -
               

     

       38
       38
       -
               future = Future()

     

       39
       39
       -
               self.queue.put((task, future))

     

       40
       40
       -
               return future.result()

     

       41
       41
       -
           

     

       42
       42
       -
           def close(self):

     

       43
       43
       -
               self.shutdown_event.set()

     

       44
       44
       -
               self.thread.join()

     

       45
       45
       -
               with self.lock:

     

       46
       46
       -
                   self.conn.close()

     

       47
       47
       -
       

     

       48
       48
       -
       def try_insert_post(

     

       49
       49
       -
           db: DataBaseWorker, 

     

       50
       50
       -
           post_id: str,

     

       51
       51
       -
           in_reply: str | None,

     

       52
       52
       -
           input_user: str,

     

       53
       53
       -
           input_service: str) -> bool:

     

       54
       54
       -
           root_id = None

     

       55
       55
       -
           parent_id = None

     

       56
       56
       -
           

     

       57
       57
       -
           if in_reply:

     

       58
       58
       -
               parent_post = find_post(db, in_reply, input_user, input_service)

     

       59
       59
       -
               if not parent_post:

     

       60
       60
       -
                   return False

     

       61
       61
       -
           

     

       62
       62
       -
               root_id = parent_post['id']

     

       63
       63
       -
               parent_id = root_id

     

       64
       64
       -
               if parent_post['root_id']:

     

       65
       65
       -
                   root_id = parent_post['root_id']

     

       66
       66
       -
           

     

       67
       67
       -
           if root_id and parent_id:

     

       68
       68
       -
               insert_reply(db,post_id, input_user, input_service, parent_id, root_id)

     

       69
       69
       -
           else:

     

       70
       70
       -
               insert_post(db, post_id, input_user, input_service)

     

       71
       71
       -
           

     

       72
       72
       -
           return True

     

       73
       73
       -
           

     

       74
       74
       -
       

     

       75
       75
       -
       def find_mapped_thread(

     

       76
       76
       -
           db: DataBaseWorker, 

     

       77
       77
       -
           parent_id: str,

     

       78
       78
       -
           input_user: str,

     

       79
       79
       -
           input_service: str,

     

       80
       80
       -
           output_user: str,

     

       81
       81
       -
           output_service: str):

     

       82
       82
       -
               

     

       83
       83
       -
           reply_data: dict | None = find_post(db, parent_id, input_user, input_service)

     

       84
       84
       -
           if not reply_data:

     

       85
       85
       -
               return None

     

       86
       86
       -
                   

     

       87
       87
       -
           reply_mappings: list[str] | None = find_mappings(db, reply_data['id'], output_service, output_user)

     

       88
       88
       -
           if not reply_mappings:

     

       89
       89
       -
               return None

     

       90
       90
       -
                   

     

       91
       91
       -
           reply_identifier: str = reply_mappings[-1]

     

       92
       92
       -
           root_identifier: str = reply_mappings[0]

     

       93
       93
       -
           if reply_data['root_id']:

     

       94
       94
       -
               root_data = find_post_by_id(db, reply_data['root_id'])

     

       95
       95
       -
               if not root_data:

     

       96
       96
       -
                   return None

     

       97
       97
       -
                       

     

       98
       98
       -
               root_mappings = find_mappings(db, reply_data['root_id'], output_service, output_user)

     

       99
       99
       -
               if not root_mappings:

     

       100
       100
       -
                   return None

     

       101
       101
       -
               root_identifier = root_mappings[0]

     

       102
       102
       -
           

     

       103
       103
       -
           return (

     

       104
       104
       -
               root_identifier[0], # real ids

     

       105
       105
       -
               reply_identifier[0],

     

       106
       106
       -
               reply_data['root_id'], # db ids

     

       107
       107
       -
               reply_data['id']

     

       108
       108
       -
           )

     

       109
       109
       -
           

     

       110
       110
       -
       

     

       111
       111
       -
       def insert_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str) -> int:

     

       112
       112
       -
           db.execute(

     

       113
       113
       -
               """

     

       114
       114
       -
               INSERT INTO posts (user_id, service, identifier)

     

       115
       115
       -
               VALUES (?, ?, ?);

     

       116
       116
       -
               """, (user_id, serivce, identifier))

     

       117
       117
       -
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       118
       118
       -
       

     

       119
       119
       -
       def insert_reply(db: DataBaseWorker, identifier: str, user_id: str, serivce: str, parent: int, root: int) -> int:

     

       120
       120
       -
           db.execute(

     

       121
       121
       -
               """

     

       122
       122
       -
               INSERT INTO posts (user_id, service, identifier, parent_id, root_id)

     

       123
       123
       -
               VALUES (?, ?, ?, ?, ?);

     

       124
       124
       -
               """, (user_id, serivce, identifier, parent, root))

     

       125
       125
       -
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       126
       126
       -
       

     

       127
       127
       -
       def insert_mapping(db: DataBaseWorker, original: int, mapped: int):

     

       128
       128
       -
           db.execute("""

     

       129
       129
       -
           INSERT INTO mappings (original_post_id, mapped_post_id)

     

       130
       130
       -
           VALUES (?, ?);

     

       131
       131
       -
           """, (original, mapped))

     

       132
       132
       -
       

     

       133
       133
       -
       def delete_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str):

     

       134
       134
       -
           db.execute(

     

       135
       135
       -
               """

     

       136
       136
       -
               DELETE FROM posts

     

       137
       137
       -
               WHERE identifier = ?

     

       138
       138
       -
                 AND service = ?

     

       139
       139
       -
                 AND user_id = ?

     

       140
       140
       -
               """, (identifier, serivce, user_id))

     

       141
       141
       -
           

     

       142
       142
       -
       

     

       143
       143
       -
       def find_mappings(db: DataBaseWorker, original_post: int, service: str, user_id: str) -> list[str]:

     

       144
       144
       -
           return db.execute(

     

       145
       145
       -
               """

     

       146
       146
       -
               SELECT p.identifier

     

       147
       147
       -
               FROM posts AS p

     

       148
       148
       -
               JOIN mappings AS m

     

       149
       149
       -
                 ON p.id = m.mapped_post_id

     

       150
       150
       -
               WHERE m.original_post_id = ?

     

       151
       151
       -
                 AND p.service = ?

     

       152
       152
       -
                 AND p.user_id = ?

     

       153
       153
       -
               ORDER BY p.id;

     

       154
       154
       -
               """,

     

       155
       155
       -
               (original_post, service, user_id))

     

       156
       156
       -
           

     

       157
       157
       -
       def find_post_by_id(db: DataBaseWorker, id: int) -> dict | None:

     

       158
       158
       -
           result = db.execute(

     

       159
       159
       -
               """

     

       160
       160
       -
               SELECT user_id, service, identifier, parent_id, root_id

     

       161
       161
       -
               FROM posts 

     

       162
       162
       -
               WHERE id = ?

     

       163
       163
       -
               """, (id,))

     

       164
       164
       -
           if not result:

     

       165
       165
       -
               return None

     

       166
       166
       -
           user_id, service, identifier, parent_id, root_id = result[0]

     

       167
       167
       -
           return {

     

       168
       168
       -
               'user_id': user_id,

     

       169
       169
       -
               'service': service,

     

       170
       170
       -
               'identifier': identifier,

     

       171
       171
       -
               'parent_id': parent_id,

     

       172
       172
       -
               'root_id': root_id

     

       173
       173
       -
           }

     

       174
       174
       -
       

     

       175
       175
       -
       def find_post(db: DataBaseWorker, identifier: str, user_id: str, service: str) -> dict | None:

     

       176
       176
       -
           result = db.execute(

     

       177
       177
       -
               """

     

       178
       178
       -
               SELECT id, parent_id, root_id

     

       179
       179
       -
               FROM posts 

     

       180
       180
       -
               WHERE identifier = ? 

     

       181
       181
       -
                 AND user_id = ? 

     

       182
       182
       -
                 AND service = ?

     

       183
       183
       -
               """, (identifier, user_id, service))

     

       184
       184
       -
           if not result:

     

       185
       185
       -
               return None

     

       186
       186
       -
           id, parent_id, root_id = result[0]

     

       187
       187
       -
           return {

     

       188
       188
       -
               'id': id,

     

       189
       189
       -
               'parent_id': parent_id,

     

       190
       190
       -
               'root_id': root_id

     

       191
       191
       -
           }

+80 -55

main.py

···

       1
       1
       -
       from util import LOGGER

     

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       2
       3
        
       import os

     

       3
       3
       -
       import json

     

       4
       4
       -
       import database

     

       5
       5
       -
       import mastodon, misskey, bluesky, cross

     

       6
       6
       -
       import asyncio, threading, queue, traceback

     

       7
       7
       -
       import util

     

       4
       4
       +
       import queue

     

       5
       5
       +
       import threading

     

       6
       6
       +
       import traceback

     

       7
       7
       +
       

     

       8
       8
       +
       import cross

     

       9
       9
       +
       import util.database as database

     

       10
       10
       +
       from bluesky.input import BlueskyJetstreamInput

     

       11
       11
       +
       from bluesky.output import BlueskyOutput, BlueskyOutputOptions

     

       12
       12
       +
       from mastodon.input import MastodonInput, MastodonInputOptions

     

       13
       13
       +
       from mastodon.output import MastodonOutput

     

       14
       14
       +
       from misskey.input import MisskeyInput

     

       15
       15
       +
       from util.util import LOGGER, as_json

     

       8
       16
        
       

     

       9
       17
        
       DEFAULT_SETTINGS: dict = {

     

       10
       10
       -
           'input': {

     

       11
       11
       -
               'type': 'mastodon-wss',

     

       12
       12
       -
               'instance': 'env:MASTODON_INSTANCE',

     

       13
       13
       -
               'token': 'env:MASTODON_TOKEN',

     

       14
       14
       -
               "options": mastodon.MastodonInputOptions({})

     

       18
       18
       +
           "input": {

     

       19
       19
       +
               "type": "mastodon-wss",

     

       20
       20
       +
               "instance": "env:MASTODON_INSTANCE",

     

       21
       21
       +
               "token": "env:MASTODON_TOKEN",

     

       22
       22
       +
               "options": MastodonInputOptions({}),

     

       15
       23
        
           },

     

       16
       16
       -
           'outputs': [

     

       24
       24
       +
           "outputs": [

     

       17
       25
        
               {

     

       18
       18
       -
                   'type': 'bluesky',

     

       19
       19
       -
                   'handle': 'env:BLUESKY_HANDLE',

     

       20
       20
       -
                   'app-password': 'env:BLUESKY_APP_PASSWORD',

     

       21
       21
       -
                   'options': bluesky.BlueskyOutputOptions({})

     

       26
       26
       +
                   "type": "bluesky",

     

       27
       27
       +
                   "handle": "env:BLUESKY_HANDLE",

     

       28
       28
       +
                   "app-password": "env:BLUESKY_APP_PASSWORD",

     

       29
       29
       +
                   "options": BlueskyOutputOptions({}),

     

       22
       30
        
               }

     

       23
       23
       -
           ]

     

       31
       31
       +
           ],

     

       24
       32
        
       }

     

       25
       33
        
       

     

       26
       34
        
       INPUTS = {

     

       27
       27
       -
           "mastodon-wss": lambda settings, db: mastodon.MastodonInput(settings, db),

     

       28
       28
       -
           "misskey-wss": lambda settigs, db: misskey.MisskeyInput(settigs, db),

     

       29
       29
       -
           "bluesky-pds-wss": lambda settings, db: bluesky.BlueskyPdsInput(settings, db)

     

       35
       35
       +
           "mastodon-wss": lambda settings, db: MastodonInput(settings, db),

     

       36
       36
       +
           "misskey-wss": lambda settigs, db: MisskeyInput(settigs, db),

     

       37
       37
       +
           "bluesky-jetstream-wss": lambda settings, db: BlueskyJetstreamInput(settings, db),

     

       30
       38
        
       }

     

       31
       39
        
       

     

       32
       40
        
       OUTPUTS = {

     

       33
       33
       -
           "bluesky": lambda input, settings, db: bluesky.BlueskyOutput(input, settings, db),

     

       34
       34
       -
           "mastodon": lambda input, settings, db: mastodon.MastodonOutput(input, settings, db)

     

       41
       41
       +
           "bluesky": lambda input, settings, db: BlueskyOutput(input, settings, db),

     

       42
       42
       +
           "mastodon": lambda input, settings, db: MastodonOutput(input, settings, db),

     

       35
       43
        
       }

     

       44
       44
       +
       

     

       36
       45
        
       

     

       37
       46
        
       def execute(data_dir):

     

       38
       47
        
           if not os.path.exists(data_dir):

     

       39
       48
        
               os.makedirs(data_dir)

     

       40
       40
       -
           

     

       41
       41
       -
           settings_path = os.path.join(data_dir, 'settings.json')

     

       42
       42
       -
           database_path = os.path.join(data_dir, 'data.db')

     

       43
       43
       -
           

     

       49
       49
       +
       

     

       50
       50
       +
           settings_path = os.path.join(data_dir, "settings.json")

     

       51
       51
       +
           database_path = os.path.join(data_dir, "data.db")

     

       52
       52
       +
       

     

       44
       53
        
           if not os.path.exists(settings_path):

     

       45
       54
        
               LOGGER.info("First launch detected! Creating %s and exiting!", settings_path)

     

       46
       46
       -
               

     

       47
       47
       -
               with open(settings_path, 'w') as f:

     

       48
       48
       -
                   f.write(util.as_json(DEFAULT_SETTINGS, indent=2))

     

       55
       55
       +
       

     

       56
       56
       +
               with open(settings_path, "w") as f:

     

       57
       57
       +
                   f.write(as_json(DEFAULT_SETTINGS, indent=2))

     

       49
       58
        
               return 0

     

       50
       59
        
       

     

       51
       51
       -
           LOGGER.info('Loading settings...')

     

       52
       52
       -
           with open(settings_path, 'rb') as f:

     

       60
       60
       +
           LOGGER.info("Loading settings...")

     

       61
       61
       +
           with open(settings_path, "rb") as f:

     

       53
       62
        
               settings = json.load(f)

     

       54
       54
       -
           

     

       55
       55
       -
           LOGGER.info('Starting database worker...')

     

       63
       63
       +
       

     

       64
       64
       +
           LOGGER.info("Starting database worker...")

     

       56
       65
        
           db_worker = database.DataBaseWorker(os.path.abspath(database_path))

     

       57
       57
       -
           

     

       58
       58
       -
           db_worker.execute('PRAGMA foreign_keys = ON;')

     

       59
       59
       -
           

     

       66
       66
       +
       

     

       67
       67
       +
           db_worker.execute("PRAGMA foreign_keys = ON;")

     

       68
       68
       +
       

     

       60
       69
        
           # create the posts table

     

       61
       70
        
           # id - internal id of the post

     

       62
       71
        
           # user_id - user id on the service (e.g. a724sknj5y9ydk0w)

     
···

       69
       78
        
                   id         INTEGER PRIMARY KEY AUTOINCREMENT,

     

       70
       79
        
                   user_id    TEXT NOT NULL,

     

       71
       80
        
                   service    TEXT NOT NULL,

     

       72
       72
       -
                   identifier TEXT NOT NULL UNIQUE,

     

       81
       81
       +
                   identifier TEXT NOT NULL,

     

       73
       82
        
                   parent_id  INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL,

     

       74
       83
        
                   root_id    INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       75
       84
        
               );

     

       76
       85
        
               """

     

       77
       86
        
           )

     

       78
       78
       -
           

     

       87
       87
       +
       

     

       88
       88
       +
           columns = db_worker.execute("PRAGMA table_info(posts)")

     

       89
       89
       +
           column_names = [col[1] for col in columns]

     

       90
       90
       +
           if "reposted_id" not in column_names:

     

       91
       91
       +
               db_worker.execute("""

     

       92
       92
       +
                   ALTER TABLE posts

     

       93
       93
       +
                   ADD COLUMN reposted_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       94
       94
       +
               """)

     

       95
       95
       +
           if "extra_data" not in column_names:

     

       96
       96
       +
               db_worker.execute("""

     

       97
       97
       +
                   ALTER TABLE posts

     

       98
       98
       +
                   ADD COLUMN extra_data TEXT NULL

     

       99
       99
       +
               """)

     

       100
       100
       +
       

     

       79
       101
        
           # create the mappings table

     

       80
       102
        
           # original_post_id - the post this was mapped from

     

       81
       103
        
           # mapped_post_id - the post this was mapped to

     
···

       87
       109
        
               );          

     

       88
       110
        
               """

     

       89
       111
        
           )

     

       90
       90
       -
           

     

       91
       91
       -
           input_settings = settings.get('input')

     

       112
       112
       +
       

     

       113
       113
       +
           input_settings = settings.get("input")

     

       92
       114
        
           if not input_settings:

     

       93
       115
        
               raise Exception("No input specified!")

     

       94
       94
       -
           outputs_settings = settings.get('outputs', [])

     

       95
       95
       -
           

     

       96
       96
       -
           input = INPUTS[input_settings['type']](input_settings, db_worker)

     

       97
       97
       -
           

     

       116
       116
       +
           outputs_settings = settings.get("outputs", [])

     

       117
       117
       +
       

     

       118
       118
       +
           input = INPUTS[input_settings["type"]](input_settings, db_worker)

     

       119
       119
       +
       

     

       98
       120
        
           if not outputs_settings:

     

       99
       121
        
               LOGGER.warning("No outputs specified! Check the config!")

     

       100
       100
       -
           

     

       122
       122
       +
       

     

       101
       123
        
           outputs: list[cross.Output] = []

     

       102
       124
        
           for output_settings in outputs_settings:

     

       103
       103
       -
               outputs.append(OUTPUTS[output_settings['type']](input, output_settings, db_worker))

     

       104
       104
       -
           

     

       105
       105
       -
           LOGGER.info('Starting task worker...')

     

       125
       125
       +
               outputs.append(

     

       126
       126
       +
                   OUTPUTS[output_settings["type"]](input, output_settings, db_worker)

     

       127
       127
       +
               )

     

       128
       128
       +
       

     

       129
       129
       +
           LOGGER.info("Starting task worker...")

     

       130
       130
       +
       

     

       106
       131
        
           def worker(queue: queue.Queue):

     

       107
       132
        
               while True:

     

       108
       133
        
                   task = queue.get()

     

       109
       134
        
                   if task is None:

     

       110
       135
        
                       break

     

       111
       111
       -
                   

     

       136
       136
       +
       

     

       112
       137
        
                   try:

     

       113
       138
        
                       task()

     

       114
       139
        
                   except Exception as e:

     
···

       116
       141
        
                       traceback.print_exc()

     

       117
       142
        
                   finally:

     

       118
       143
        
                       queue.task_done()

     

       119
       119
       -
           

     

       144
       144
       +
       

     

       120
       145
        
           task_queue = queue.Queue()

     

       121
       146
        
           thread = threading.Thread(target=worker, args=(task_queue,), daemon=True)

     

       122
       147
        
           thread.start()

     

       123
       123
       -
           

     

       124
       124
       -
           LOGGER.info('Connecting to %s...', input.service)

     

       148
       148
       +
       

     

       149
       149
       +
           LOGGER.info("Connecting to %s...", input.service)

     

       125
       150
        
           try:

     

       126
       151
        
               asyncio.run(input.listen(outputs, lambda x: task_queue.put(x)))

     

       127
       152
        
           except KeyboardInterrupt:

     

       128
       153
        
               LOGGER.info("Stopping...")

     

       129
       129
       -
           

     

       154
       154
       +
       

     

       130
       155
        
           task_queue.join()

     

       131
       156
        
           task_queue.put(None)

     

       132
       157
        
           thread.join()

     

       133
       133
       -
           

     

       158
       158
       +
       

     

       134
       159
        
       

     

       135
       160
        
       if __name__ == "__main__":

     

       136
       136
       -
           execute('./data')
     

       161
       161
       +
           execute("./data")

-193

markeddown.py

···

       1
       1
       -
       import re

     

       2
       2
       -
       from html.parser import HTMLParser

     

       3
       3
       -
       from html import unescape

     

       4
       4
       -
       

     

       5
       5
       -
       ### VIBECODED CODE ALERT!!! ###

     

       6
       6
       -
       

     

       7
       7
       -
       class HTMLToMarkdownParser(HTMLParser):

     

       8
       8
       -
           def __init__(self):

     

       9
       9
       -
               super().__init__()

     

       10
       10
       -
               self.markdown = []

     

       11
       11
       -
               self.current_tag_stack = []

     

       12
       12
       -
               self.list_stack = []

     

       13
       13
       -
               self.in_pre = False

     

       14
       14
       -
               self.in_code = False

     

       15
       15
       -
               self.table_data = []

     

       16
       16
       -
               self.current_row = []

     

       17
       17
       -
               self.in_table = False

     

       18
       18
       -
               self.link_stack = []

     

       19
       19
       -
               self.preserve_spaces = False

     

       20
       20
       -
               

     

       21
       21
       -
           def handle_starttag(self, tag, attrs):

     

       22
       22
       -
               attrs_dict = dict(attrs)

     

       23
       23
       -
               

     

       24
       24
       -
               if tag == 'h1':

     

       25
       25
       -
                   self.markdown.append('\n# ')

     

       26
       26
       -
               elif tag == 'h2':

     

       27
       27
       -
                   self.markdown.append('\n## ')

     

       28
       28
       -
               elif tag == 'h3':

     

       29
       29
       -
                   self.markdown.append('\n### ')

     

       30
       30
       -
               elif tag == 'h4':

     

       31
       31
       -
                   self.markdown.append('\n#### ')

     

       32
       32
       -
               elif tag == 'h5':

     

       33
       33
       -
                   self.markdown.append('\n##### ')

     

       34
       34
       -
               elif tag == 'h6':

     

       35
       35
       -
                   self.markdown.append('\n###### ')

     

       36
       36
       -
               elif tag == 'p':

     

       37
       37
       -
                   self.markdown.append('\n\n')

     

       38
       38
       -
               elif tag == 'br':

     

       39
       39
       -
                   self.markdown.append('  \n')

     

       40
       40
       -
               elif tag == 'strong' or tag == 'b':

     

       41
       41
       -
                   self.markdown.append('**')

     

       42
       42
       -
               elif tag == 'em' or tag == 'i':

     

       43
       43
       -
                   self.markdown.append('*')

     

       44
       44
       -
               elif tag == 'code':

     

       45
       45
       -
                   if not self.in_pre:

     

       46
       46
       -
                       self.markdown.append('`')

     

       47
       47
       -
                       self.in_code = True

     

       48
       48
       -
               elif tag == 'pre':

     

       49
       49
       -
                   self.markdown.append('\n```\n')

     

       50
       50
       -
                   self.in_pre = True

     

       51
       51
       -
               elif tag == 'blockquote':

     

       52
       52
       -
                   self.markdown.append('\n> ')

     

       53
       53
       -
               elif tag == 'ul':

     

       54
       54
       -
                   self.list_stack.append('ul')

     

       55
       55
       -
                   self.markdown.append('\n')

     

       56
       56
       -
               elif tag == 'ol':

     

       57
       57
       -
                   self.list_stack.append('ol')

     

       58
       58
       -
                   self.markdown.append('\n')

     

       59
       59
       -
               elif tag == 'li':

     

       60
       60
       -
                   indent = '  ' * (len(self.list_stack) - 1)

     

       61
       61
       -
                   if self.list_stack and self.list_stack[-1] == 'ul':

     

       62
       62
       -
                       self.markdown.append(f'{indent}- ')

     

       63
       63
       -
                   elif self.list_stack and self.list_stack[-1] == 'ol':

     

       64
       64
       -
                       self.markdown.append(f'{indent}1. ')

     

       65
       65
       -
               elif tag == 'a':

     

       66
       66
       -
                   href = attrs_dict.get('href', '')

     

       67
       67
       -
                   self.link_stack.append(href)

     

       68
       68
       -
                   self.markdown.append('[')

     

       69
       69
       -
               elif tag == 'img':

     

       70
       70
       -
                   src = attrs_dict.get('src', '')

     

       71
       71
       -
                   alt = attrs_dict.get('alt', '')

     

       72
       72
       -
                   title = attrs_dict.get('title', '')

     

       73
       73
       -
                   if title:

     

       74
       74
       -
                       self.markdown.append(f'![{alt}]({src} "{title}")')

     

       75
       75
       -
                   else:

     

       76
       76
       -
                       self.markdown.append(f'![{alt}]({src})')

     

       77
       77
       -
               elif tag == 'hr':

     

       78
       78
       -
                   self.markdown.append('\n---\n')

     

       79
       79
       -
               elif tag == 'table':

     

       80
       80
       -
                   self.in_table = True

     

       81
       81
       -
                   self.table_data = []

     

       82
       82
       -
               elif tag == 'tr':

     

       83
       83
       -
                   self.current_row = []

     

       84
       84
       -
               elif tag == 'th' or tag == 'td':

     

       85
       85
       -
                   pass  # Handle in handle_data

     

       86
       86
       -
               elif tag == 'del' or tag == 's':

     

       87
       87
       -
                   self.markdown.append('~~')

     

       88
       88
       -
                   

     

       89
       89
       -
               self.current_tag_stack.append(tag)

     

       90
       90
       -
           

     

       91
       91
       -
           def handle_endtag(self, tag):

     

       92
       92
       -
               if not self.current_tag_stack:

     

       93
       93
       -
                   return

     

       94
       94
       -
                   

     

       95
       95
       -
               # Remove the tag from stack

     

       96
       96
       -
               if tag in self.current_tag_stack:

     

       97
       97
       -
                   self.current_tag_stack.remove(tag)

     

       98
       98
       -
               

     

       99
       99
       -
               if tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:

     

       100
       100
       -
                   self.markdown.append('\n')

     

       101
       101
       -
               elif tag == 'p':

     

       102
       102
       -
                   self.markdown.append('\n')

     

       103
       103
       -
               elif tag == 'strong' or tag == 'b':

     

       104
       104
       -
                   self.markdown.append('**')

     

       105
       105
       -
               elif tag == 'em' or tag == 'i':

     

       106
       106
       -
                   self.markdown.append('*')

     

       107
       107
       -
               elif tag == 'code':

     

       108
       108
       -
                   if not self.in_pre and self.in_code:

     

       109
       109
       -
                       self.markdown.append('`')

     

       110
       110
       -
                       self.in_code = False

     

       111
       111
       -
               elif tag == 'pre':

     

       112
       112
       -
                   self.markdown.append('\n```\n')

     

       113
       113
       -
                   self.in_pre = False

     

       114
       114
       -
               elif tag == 'blockquote':

     

       115
       115
       -
                   self.markdown.append('\n')

     

       116
       116
       -
               elif tag == 'ul' or tag == 'ol':

     

       117
       117
       -
                   if self.list_stack:

     

       118
       118
       -
                       self.list_stack.pop()

     

       119
       119
       -
                   self.markdown.append('\n')

     

       120
       120
       -
               elif tag == 'li':

     

       121
       121
       -
                   self.markdown.append('\n')

     

       122
       122
       -
               elif tag == 'a':

     

       123
       123
       -
                   if self.link_stack:

     

       124
       124
       -
                       href = self.link_stack.pop()

     

       125
       125
       -
                       self.markdown.append(f']({href})')

     

       126
       126
       -
               elif tag == 'table':

     

       127
       127
       -
                   self.in_table = False

     

       128
       128
       -
                   self._process_table()

     

       129
       129
       -
               elif tag == 'tr':

     

       130
       130
       -
                   if self.in_table:

     

       131
       131
       -
                       self.table_data.append(self.current_row[:])

     

       132
       132
       -
                       self.current_row = []

     

       133
       133
       -
               elif tag == 'del' or tag == 's':

     

       134
       134
       -
                   self.markdown.append('~~')

     

       135
       135
       -
           

     

       136
       136
       -
           def handle_data(self, data):

     

       137
       137
       -
               # Clean up whitespace, but preserve intentional spacing

     

       138
       138
       -
               if self.in_pre:

     

       139
       139
       -
                   self.markdown.append(data)

     

       140
       140
       -
               else:

     

       141
       141
       -
                   # Check if we're in a table cell

     

       142
       142
       -
                   if self.in_table and (not self.current_tag_stack or 

     

       143
       143
       -
                                       self.current_tag_stack[-1] in ['td', 'th']):

     

       144
       144
       -
                       self.current_row.append(data.strip())

     

       145
       145
       -
                   else:

     

       146
       146
       -
                       if hasattr(self, 'preserve_spaces') and self.preserve_spaces:

     

       147
       147
       -
                           # Only normalize line breaks and tabs, preserve spaces

     

       148
       148
       -
                           cleaned_data = re.sub(r'[\r\n\t]+', ' ', data)

     

       149
       149
       -
                           # Remove leading/trailing whitespace only from the entire content

     

       150
       150
       -
                           if cleaned_data.strip():

     

       151
       151
       -
                               self.markdown.append(cleaned_data)

     

       152
       152
       -
                       else:

     

       153
       153
       -
                           # Default: Normalize all whitespace

     

       154
       154
       -
                           cleaned_data = re.sub(r'\s+', ' ', data.strip())

     

       155
       155
       -
                           if cleaned_data:

     

       156
       156
       -
                               self.markdown.append(cleaned_data)

     

       157
       157
       -
           

     

       158
       158
       -
           def _process_table(self):

     

       159
       159
       -
               if not self.table_data:

     

       160
       160
       -
                   return

     

       161
       161
       -
                   

     

       162
       162
       -
               self.markdown.append('\n')

     

       163
       163
       -
               

     

       164
       164
       -
               # Process header row if exists

     

       165
       165
       -
               if self.table_data:

     

       166
       166
       -
                   header = self.table_data[0]

     

       167
       167
       -
                   self.markdown.append('| ' + ' | '.join(header) + ' |\n')

     

       168
       168
       -
                   self.markdown.append('| ' + ' | '.join(['---'] * len(header)) + ' |\n')

     

       169
       169
       -
                   

     

       170
       170
       -
                   # Process data rows

     

       171
       171
       -
                   for row in self.table_data[1:]:

     

       172
       172
       -
                       # Pad row to match header length

     

       173
       173
       -
                       while len(row) < len(header):

     

       174
       174
       -
                           row.append('')

     

       175
       175
       -
                       self.markdown.append('| ' + ' | '.join(row) + ' |\n')

     

       176
       176
       -
               

     

       177
       177
       -
               self.markdown.append('\n')

     

       178
       178
       -
           

     

       179
       179
       -
           def get_markdown(self):

     

       180
       180
       -
               return ''.join(self.markdown)

     

       181
       181
       -
           

     

       182
       182
       -
           def reset(self):

     

       183
       183
       -
               """Reset the parser state for reuse."""

     

       184
       184
       -
               super().reset()

     

       185
       185
       -
               self.markdown = []

     

       186
       186
       -
               self.current_tag_stack = []

     

       187
       187
       -
               self.list_stack = []

     

       188
       188
       -
               self.in_pre = False

     

       189
       189
       -
               self.in_code = False

     

       190
       190
       -
               self.table_data = []

     

       191
       191
       -
               self.current_row = []

     

       192
       192
       -
               self.in_table = False

     

       193
       193
       -
               self.link_stack = []

+52

mastodon/common.py

···

       1
       1
       +
       import cross

     

       2
       2
       +
       from util.media import MediaInfo

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       class MastodonPost(cross.Post):

     

       6
       6
       +
           def __init__(

     

       7
       7
       +
               self,

     

       8
       8
       +
               status: dict,

     

       9
       9
       +
               tokens: list[cross.Token],

     

       10
       10
       +
               media_attachments: list[MediaInfo],

     

       11
       11
       +
           ) -> None:

     

       12
       12
       +
               super().__init__()

     

       13
       13
       +
               self.id = status["id"]

     

       14
       14
       +
               self.parent_id = status.get("in_reply_to_id")

     

       15
       15
       +
               self.tokens = tokens

     

       16
       16
       +
               self.content_type = status.get("content_type", "text/plain")

     

       17
       17
       +
               self.timestamp = status["created_at"]

     

       18
       18
       +
               self.media_attachments = media_attachments

     

       19
       19
       +
               self.spoiler = status.get("spoiler_text")

     

       20
       20
       +
               self.language = [status["language"]] if status.get("language") else []

     

       21
       21
       +
               self.sensitive = status.get("sensitive", False)

     

       22
       22
       +
               self.url = status.get("url")

     

       23
       23
       +
       

     

       24
       24
       +
           def get_id(self) -> str:

     

       25
       25
       +
               return self.id

     

       26
       26
       +
       

     

       27
       27
       +
           def get_parent_id(self) -> str | None:

     

       28
       28
       +
               return self.parent_id

     

       29
       29
       +
       

     

       30
       30
       +
           def get_tokens(self) -> list[cross.Token]:

     

       31
       31
       +
               return self.tokens

     

       32
       32
       +
       

     

       33
       33
       +
           def get_text_type(self) -> str:

     

       34
       34
       +
               return self.content_type

     

       35
       35
       +
       

     

       36
       36
       +
           def get_timestamp(self) -> str:

     

       37
       37
       +
               return self.timestamp

     

       38
       38
       +
       

     

       39
       39
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       40
       40
       +
               return self.media_attachments

     

       41
       41
       +
       

     

       42
       42
       +
           def get_spoiler(self) -> str | None:

     

       43
       43
       +
               return self.spoiler

     

       44
       44
       +
       

     

       45
       45
       +
           def get_languages(self) -> list[str]:

     

       46
       46
       +
               return self.language

     

       47
       47
       +
       

     

       48
       48
       +
           def is_sensitive(self) -> bool:

     

       49
       49
       +
               return self.sensitive or (self.spoiler is not None and self.spoiler != "")

     

       50
       50
       +
       

     

       51
       51
       +
           def get_post_url(self) -> str | None:

     

       52
       52
       +
               return self.url

+225

mastodon/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from typing import Any, Callable

     

       5
       5
       +
       

     

       6
       6
       +
       import requests

     

       7
       7
       +
       import websockets

     

       8
       8
       +
       

     

       9
       9
       +
       import cross

     

       10
       10
       +
       import util.database as database

     

       11
       11
       +
       import util.html_util as html_util

     

       12
       12
       +
       import util.md_util as md_util

     

       13
       13
       +
       from mastodon.common import MastodonPost

     

       14
       14
       +
       from util.database import DataBaseWorker

     

       15
       15
       +
       from util.media import MediaInfo, download_media

     

       16
       16
       +
       from util.util import LOGGER, as_envvar

     

       17
       17
       +
       

     

       18
       18
       +
       ALLOWED_VISIBILITY = ["public", "unlisted"]

     

       19
       19
       +
       MARKDOWNY = ["text/x.misskeymarkdown", "text/markdown", "text/plain"]

     

       20
       20
       +
       

     

       21
       21
       +
       

     

       22
       22
       +
       class MastodonInputOptions:

     

       23
       23
       +
           def __init__(self, o: dict) -> None:

     

       24
       24
       +
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       25
       25
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       26
       26
       +
       

     

       27
       27
       +
               allowed_visibility = o.get("allowed_visibility")

     

       28
       28
       +
               if allowed_visibility is not None:

     

       29
       29
       +
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       30
       30
       +
                       raise ValueError(

     

       31
       31
       +
                           f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}"

     

       32
       32
       +
                       )

     

       33
       33
       +
                   self.allowed_visibility = allowed_visibility

     

       34
       34
       +
       

     

       35
       35
       +
       

     

       36
       36
       +
       class MastodonInput(cross.Input):

     

       37
       37
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       38
       38
       +
               self.options = MastodonInputOptions(settings.get("options", {}))

     

       39
       39
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       40
       40
       +
                   ValueError("'token' is required")

     

       41
       41
       +
               )

     

       42
       42
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       43
       43
       +
                   ValueError("'instance' is required")

     

       44
       44
       +
               )

     

       45
       45
       +
       

     

       46
       46
       +
               service = instance[:-1] if instance.endswith("/") else instance

     

       47
       47
       +
       

     

       48
       48
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       49
       49
       +
               responce = requests.get(

     

       50
       50
       +
                   f"{service}/api/v1/accounts/verify_credentials",

     

       51
       51
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       52
       52
       +
               )

     

       53
       53
       +
               if responce.status_code != 200:

     

       54
       54
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       55
       55
       +
                   responce.raise_for_status()

     

       56
       56
       +
                   return

     

       57
       57
       +
       

     

       58
       58
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       59
       59
       +
               self.streaming = self._get_streaming_url()

     

       60
       60
       +
       

     

       61
       61
       +
               if not self.streaming:

     

       62
       62
       +
                   raise Exception("Instance %s does not support streaming!", service)

     

       63
       63
       +
       

     

       64
       64
       +
           def _get_streaming_url(self):

     

       65
       65
       +
               response = requests.get(f"{self.service}/api/v1/instance")

     

       66
       66
       +
               response.raise_for_status()

     

       67
       67
       +
               data: dict = response.json()

     

       68
       68
       +
               return (data.get("urls") or {}).get("streaming_api")

     

       69
       69
       +
       

     

       70
       70
       +
           def __to_tokens(self, status: dict):

     

       71
       71
       +
               content_type = status.get("content_type", "text/plain")

     

       72
       72
       +
               raw_text = status.get("text")

     

       73
       73
       +
       

     

       74
       74
       +
               tags: list[str] = []

     

       75
       75
       +
               for tag in status.get("tags", []):

     

       76
       76
       +
                   tags.append(tag["name"])

     

       77
       77
       +
       

     

       78
       78
       +
               mentions: list[tuple[str, str]] = []

     

       79
       79
       +
               for mention in status.get("mentions", []):

     

       80
       80
       +
                   mentions.append(("@" + mention["username"], "@" + mention["acct"]))

     

       81
       81
       +
       

     

       82
       82
       +
               if raw_text and content_type in MARKDOWNY:

     

       83
       83
       +
                   return md_util.tokenize_markdown(raw_text, tags, mentions)

     

       84
       84
       +
       

     

       85
       85
       +
               akkoma_ext: dict | None = status.get("akkoma", {}).get("source")

     

       86
       86
       +
               if akkoma_ext:

     

       87
       87
       +
                   if akkoma_ext.get("mediaType") in MARKDOWNY:

     

       88
       88
       +
                       return md_util.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       89
       89
       +
       

     

       90
       90
       +
               tokenizer = html_util.HTMLPostTokenizer()

     

       91
       91
       +
               tokenizer.mentions = mentions

     

       92
       92
       +
               tokenizer.tags = tags

     

       93
       93
       +
               tokenizer.feed(status.get("content", ""))

     

       94
       94
       +
               return tokenizer.get_tokens()

     

       95
       95
       +
       

     

       96
       96
       +
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       97
       97
       +
               # skip events from other users

     

       98
       98
       +
               if (status.get("account") or {})["id"] != self.user_id:

     

       99
       99
       +
                   return

     

       100
       100
       +
       

     

       101
       101
       +
               if status.get("visibility") not in self.options.allowed_visibility:

     

       102
       102
       +
                   # Skip f/o and direct posts

     

       103
       103
       +
                   LOGGER.info(

     

       104
       104
       +
                       "Skipping '%s'! '%s' visibility..",

     

       105
       105
       +
                       status["id"],

     

       106
       106
       +
                       status.get("visibility"),

     

       107
       107
       +
                   )

     

       108
       108
       +
                   return

     

       109
       109
       +
       

     

       110
       110
       +
               # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       111
       111
       +
               # we don't handle reblogs. possible with bridgy(?) and self

     

       112
       112
       +
               # we don't handle quotes.

     

       113
       113
       +
               if status.get("poll"):

     

       114
       114
       +
                   LOGGER.info("Skipping '%s'! Contains a poll..", status["id"])

     

       115
       115
       +
                   return

     

       116
       116
       +
       

     

       117
       117
       +
               if status.get("quote_id") or status.get("quote"):

     

       118
       118
       +
                   LOGGER.info("Skipping '%s'! Quote..", status["id"])

     

       119
       119
       +
                   return

     

       120
       120
       +
       

     

       121
       121
       +
               reblog: dict | None = status.get("reblog")

     

       122
       122
       +
               if reblog:

     

       123
       123
       +
                   if (reblog.get("account") or {})["id"] != self.user_id:

     

       124
       124
       +
                       LOGGER.info("Skipping '%s'! Reblog of other user..", status["id"])

     

       125
       125
       +
                       return

     

       126
       126
       +
       

     

       127
       127
       +
                   success = database.try_insert_repost(

     

       128
       128
       +
                       self.db, status["id"], reblog["id"], self.user_id, self.service

     

       129
       129
       +
                   )

     

       130
       130
       +
                   if not success:

     

       131
       131
       +
                       LOGGER.info(

     

       132
       132
       +
                           "Skipping '%s' as reblogged post was not found in db!", status["id"]

     

       133
       133
       +
                       )

     

       134
       134
       +
                       return

     

       135
       135
       +
       

     

       136
       136
       +
                   for output in outputs:

     

       137
       137
       +
                       output.accept_repost(status["id"], reblog["id"])

     

       138
       138
       +
                   return

     

       139
       139
       +
       

     

       140
       140
       +
               in_reply: str | None = status.get("in_reply_to_id")

     

       141
       141
       +
               in_reply_to: str | None = status.get("in_reply_to_account_id")

     

       142
       142
       +
               if in_reply_to and in_reply_to != self.user_id:

     

       143
       143
       +
                   # We don't support replies.

     

       144
       144
       +
                   LOGGER.info("Skipping '%s'! Reply to other user..", status["id"])

     

       145
       145
       +
                   return

     

       146
       146
       +
       

     

       147
       147
       +
               success = database.try_insert_post(

     

       148
       148
       +
                   self.db, status["id"], in_reply, self.user_id, self.service

     

       149
       149
       +
               )

     

       150
       150
       +
               if not success:

     

       151
       151
       +
                   LOGGER.info(

     

       152
       152
       +
                       "Skipping '%s' as parent post was not found in db!", status["id"]

     

       153
       153
       +
                   )

     

       154
       154
       +
                   return

     

       155
       155
       +
       

     

       156
       156
       +
               tokens = self.__to_tokens(status)

     

       157
       157
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       158
       158
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", status["id"])

     

       159
       159
       +
                   return

     

       160
       160
       +
       

     

       161
       161
       +
               LOGGER.info("Crossposting '%s'...", status["id"])

     

       162
       162
       +
       

     

       163
       163
       +
               media_attachments: list[MediaInfo] = []

     

       164
       164
       +
               for attachment in status.get("media_attachments", []):

     

       165
       165
       +
                   LOGGER.info("Downloading %s...", attachment["url"])

     

       166
       166
       +
                   info = download_media(

     

       167
       167
       +
                       attachment["url"], attachment.get("description") or ""

     

       168
       168
       +
                   )

     

       169
       169
       +
                   if not info:

     

       170
       170
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", status["id"])

     

       171
       171
       +
                       return

     

       172
       172
       +
                   media_attachments.append(info)

     

       173
       173
       +
       

     

       174
       174
       +
               cross_post = MastodonPost(status, tokens, media_attachments)

     

       175
       175
       +
               for output in outputs:

     

       176
       176
       +
                   output.accept_post(cross_post)

     

       177
       177
       +
       

     

       178
       178
       +
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       179
       179
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       180
       180
       +
               if not post:

     

       181
       181
       +
                   return

     

       182
       182
       +
       

     

       183
       183
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       184
       184
       +
               if post["reposted_id"]:

     

       185
       185
       +
                   for output in outputs:

     

       186
       186
       +
                       output.delete_repost(identifier)

     

       187
       187
       +
               else:

     

       188
       188
       +
                   for output in outputs:

     

       189
       189
       +
                       output.delete_post(identifier)

     

       190
       190
       +
       

     

       191
       191
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       192
       192
       +
       

     

       193
       193
       +
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       194
       194
       +
               match event:

     

       195
       195
       +
                   case "update":

     

       196
       196
       +
                       self._on_create_post(outputs, json.loads(payload))

     

       197
       197
       +
                   case "delete":

     

       198
       198
       +
                       self._on_delete_post(outputs, payload)

     

       199
       199
       +
       

     

       200
       200
       +
           async def listen(

     

       201
       201
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       202
       202
       +
           ):

     

       203
       203
       +
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       204
       204
       +
       

     

       205
       205
       +
               async for ws in websockets.connect(

     

       206
       206
       +
                   uri, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       207
       207
       +
               ):

     

       208
       208
       +
                   try:

     

       209
       209
       +
                       LOGGER.info("Listening to %s...", self.streaming)

     

       210
       210
       +
       

     

       211
       211
       +
                       async def listen_for_messages():

     

       212
       212
       +
                           async for msg in ws:

     

       213
       213
       +
                               data = json.loads(msg)

     

       214
       214
       +
                               event: str = data.get("event")

     

       215
       215
       +
                               payload: str = data.get("payload")

     

       216
       216
       +
       

     

       217
       217
       +
                               submit(lambda: self._on_post(outputs, str(event), str(payload)))

     

       218
       218
       +
       

     

       219
       219
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       220
       220
       +
       

     

       221
       221
       +
                       await asyncio.gather(listen)

     

       222
       222
       +
                   except websockets.ConnectionClosedError as e:

     

       223
       223
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       224
       224
       +
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       225
       225
       +
                       continue

+448

mastodon/output.py

···

       1
       1
       +
       import time

     

       2
       2
       +
       

     

       3
       3
       +
       import requests

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       import misskey.mfm_util as mfm_util

     

       7
       7
       +
       import util.database as database

     

       8
       8
       +
       from util.database import DataBaseWorker

     

       9
       9
       +
       from util.media import MediaInfo

     

       10
       10
       +
       from util.util import LOGGER, as_envvar, canonical_label

     

       11
       11
       +
       

     

       12
       12
       +
       POSSIBLE_MIMES = [

     

       13
       13
       +
           "audio/ogg",

     

       14
       14
       +
           "audio/mp3",

     

       15
       15
       +
           "image/webp",

     

       16
       16
       +
           "image/jpeg",

     

       17
       17
       +
           "image/png",

     

       18
       18
       +
           "video/mp4",

     

       19
       19
       +
           "video/quicktime",

     

       20
       20
       +
           "video/webm",

     

       21
       21
       +
       ]

     

       22
       22
       +
       

     

       23
       23
       +
       TEXT_MIMES = ["text/x.misskeymarkdown", "text/markdown", "text/plain"]

     

       24
       24
       +
       

     

       25
       25
       +
       ALLOWED_POSTING_VISIBILITY = ["public", "unlisted", "private"]

     

       26
       26
       +
       

     

       27
       27
       +
       

     

       28
       28
       +
       class MastodonOutputOptions:

     

       29
       29
       +
           def __init__(self, o: dict) -> None:

     

       30
       30
       +
               self.visibility = "public"

     

       31
       31
       +
       

     

       32
       32
       +
               visibility = o.get("visibility")

     

       33
       33
       +
               if visibility is not None:

     

       34
       34
       +
                   if visibility not in ALLOWED_POSTING_VISIBILITY:

     

       35
       35
       +
                       raise ValueError(

     

       36
       36
       +
                           f"'visibility' only accepts {', '.join(ALLOWED_POSTING_VISIBILITY)}, got: {visibility}"

     

       37
       37
       +
                       )

     

       38
       38
       +
                   self.visibility = visibility

     

       39
       39
       +
       

     

       40
       40
       +
       

     

       41
       41
       +
       class MastodonOutput(cross.Output):

     

       42
       42
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       43
       43
       +
               super().__init__(input, settings, db)

     

       44
       44
       +
               self.options = settings.get("options") or {}

     

       45
       45
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       46
       46
       +
                   ValueError("'token' is required")

     

       47
       47
       +
               )

     

       48
       48
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       49
       49
       +
                   ValueError("'instance' is required")

     

       50
       50
       +
               )

     

       51
       51
       +
       

     

       52
       52
       +
               self.service = instance[:-1] if instance.endswith("/") else instance

     

       53
       53
       +
       

     

       54
       54
       +
               LOGGER.info("Verifying %s credentails...", self.service)

     

       55
       55
       +
               responce = requests.get(

     

       56
       56
       +
                   f"{self.service}/api/v1/accounts/verify_credentials",

     

       57
       57
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       58
       58
       +
               )

     

       59
       59
       +
               if responce.status_code != 200:

     

       60
       60
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       61
       61
       +
                   responce.raise_for_status()

     

       62
       62
       +
                   return

     

       63
       63
       +
               self.user_id: str = responce.json()["id"]

     

       64
       64
       +
       

     

       65
       65
       +
               LOGGER.info("Getting %s configuration...", self.service)

     

       66
       66
       +
               responce = requests.get(

     

       67
       67
       +
                   f"{self.service}/api/v1/instance",

     

       68
       68
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       69
       69
       +
               )

     

       70
       70
       +
               if responce.status_code != 200:

     

       71
       71
       +
                   LOGGER.error("Failed to get instance info!")

     

       72
       72
       +
                   responce.raise_for_status()

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               instance_info: dict = responce.json()

     

       76
       76
       +
               configuration: dict = instance_info["configuration"]

     

       77
       77
       +
       

     

       78
       78
       +
               statuses_config: dict = configuration.get("statuses", {})

     

       79
       79
       +
               self.max_characters: int = statuses_config.get("max_characters", 500)

     

       80
       80
       +
               self.max_media_attachments: int = statuses_config.get(

     

       81
       81
       +
                   "max_media_attachments", 4

     

       82
       82
       +
               )

     

       83
       83
       +
               self.characters_reserved_per_url: int = statuses_config.get(

     

       84
       84
       +
                   "characters_reserved_per_url", 23

     

       85
       85
       +
               )

     

       86
       86
       +
       

     

       87
       87
       +
               media_config: dict = configuration.get("media_attachments", {})

     

       88
       88
       +
               self.image_size_limit: int = media_config.get("image_size_limit", 16777216)

     

       89
       89
       +
               self.video_size_limit: int = media_config.get("video_size_limit", 103809024)

     

       90
       90
       +
               self.supported_mime_types: list[str] = media_config.get(

     

       91
       91
       +
                   "supported_mime_types", POSSIBLE_MIMES

     

       92
       92
       +
               )

     

       93
       93
       +
       

     

       94
       94
       +
               # *oma: max post chars

     

       95
       95
       +
               max_toot_chars = instance_info.get("max_toot_chars")

     

       96
       96
       +
               if max_toot_chars:

     

       97
       97
       +
                   self.max_characters: int = max_toot_chars

     

       98
       98
       +
       

     

       99
       99
       +
               # *oma: max upload limit

     

       100
       100
       +
               upload_limit = instance_info.get("upload_limit")

     

       101
       101
       +
               if upload_limit:

     

       102
       102
       +
                   self.image_size_limit: int = upload_limit

     

       103
       103
       +
                   self.video_size_limit: int = upload_limit

     

       104
       104
       +
       

     

       105
       105
       +
               # chuckya: supported text types

     

       106
       106
       +
               chuckya_text_mimes: list[str] = statuses_config.get("supported_mime_types", [])

     

       107
       107
       +
               self.text_format = next(

     

       108
       108
       +
                   (mime for mime in TEXT_MIMES if mime in (chuckya_text_mimes)), "text/plain"

     

       109
       109
       +
               )

     

       110
       110
       +
       

     

       111
       111
       +
               # *oma ext: supported text types

     

       112
       112
       +
               pleroma = instance_info.get("pleroma")

     

       113
       113
       +
               if pleroma:

     

       114
       114
       +
                   post_formats: list[str] = pleroma.get("metadata", {}).get(

     

       115
       115
       +
                       "post_formats", []

     

       116
       116
       +
                   )

     

       117
       117
       +
                   self.text_format = next(

     

       118
       118
       +
                       (mime for mime in TEXT_MIMES if mime in post_formats), self.text_format

     

       119
       119
       +
                   )

     

       120
       120
       +
       

     

       121
       121
       +
           def upload_media(self, attachments: list[MediaInfo]) -> list[str] | None:

     

       122
       122
       +
               for a in attachments:

     

       123
       123
       +
                   if a.mime.startswith("image/") and len(a.io) > self.image_size_limit:

     

       124
       124
       +
                       return None

     

       125
       125
       +
       

     

       126
       126
       +
                   if a.mime.startswith("video/") and len(a.io) > self.video_size_limit:

     

       127
       127
       +
                       return None

     

       128
       128
       +
       

     

       129
       129
       +
                   if not a.mime.startswith("image/") and not a.mime.startswith("video/"):

     

       130
       130
       +
                       if len(a.io) > 7_000_000:

     

       131
       131
       +
                           return None

     

       132
       132
       +
       

     

       133
       133
       +
               uploads: list[dict] = []

     

       134
       134
       +
               for a in attachments:

     

       135
       135
       +
                   data = {}

     

       136
       136
       +
                   if a.alt:

     

       137
       137
       +
                       data["description"] = a.alt

     

       138
       138
       +
       

     

       139
       139
       +
                   req = requests.post(

     

       140
       140
       +
                       f"{self.service}/api/v2/media",

     

       141
       141
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       142
       142
       +
                       files={"file": (a.name, a.io, a.mime)},

     

       143
       143
       +
                       data=data,

     

       144
       144
       +
                   )

     

       145
       145
       +
       

     

       146
       146
       +
                   if req.status_code == 200:

     

       147
       147
       +
                       LOGGER.info("Uploaded %s! (%s)", a.name, req.json()["id"])

     

       148
       148
       +
                       uploads.append({"done": True, "id": req.json()["id"]})

     

       149
       149
       +
                   elif req.status_code == 202:

     

       150
       150
       +
                       LOGGER.info("Waiting for %s to process!", a.name)

     

       151
       151
       +
                       uploads.append({"done": False, "id": req.json()["id"]})

     

       152
       152
       +
                   else:

     

       153
       153
       +
                       LOGGER.error("Failed to upload %s! %s", a.name, req.text)

     

       154
       154
       +
                       req.raise_for_status()

     

       155
       155
       +
       

     

       156
       156
       +
               while any([not val["done"] for val in uploads]):

     

       157
       157
       +
                   LOGGER.info("Waiting for media to process...")

     

       158
       158
       +
                   time.sleep(3)

     

       159
       159
       +
                   for media in uploads:

     

       160
       160
       +
                       if media["done"]:

     

       161
       161
       +
                           continue

     

       162
       162
       +
       

     

       163
       163
       +
                       reqs = requests.get(

     

       164
       164
       +
                           f"{self.service}/api/v1/media/{media['id']}",

     

       165
       165
       +
                           headers={"Authorization": f"Bearer {self.token}"},

     

       166
       166
       +
                       )

     

       167
       167
       +
       

     

       168
       168
       +
                       if reqs.status_code == 206:

     

       169
       169
       +
                           continue

     

       170
       170
       +
       

     

       171
       171
       +
                       if reqs.status_code == 200:

     

       172
       172
       +
                           media["done"] = True

     

       173
       173
       +
                           continue

     

       174
       174
       +
                       reqs.raise_for_status()

     

       175
       175
       +
       

     

       176
       176
       +
               return [val["id"] for val in uploads]

     

       177
       177
       +
       

     

       178
       178
       +
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       179
       179
       +
               p_text: str = ""

     

       180
       180
       +
       

     

       181
       181
       +
               for token in tokens:

     

       182
       182
       +
                   if isinstance(token, cross.TextToken):

     

       183
       183
       +
                       p_text += token.text

     

       184
       184
       +
                   elif isinstance(token, cross.TagToken):

     

       185
       185
       +
                       p_text += "#" + token.tag

     

       186
       186
       +
                   elif isinstance(token, cross.LinkToken):

     

       187
       187
       +
                       if canonical_label(token.label, token.href):

     

       188
       188
       +
                           p_text += token.href

     

       189
       189
       +
                       else:

     

       190
       190
       +
                           if self.text_format == "text/plain":

     

       191
       191
       +
                               p_text += f"{token.label} ({token.href})"

     

       192
       192
       +
                           elif self.text_format in {

     

       193
       193
       +
                               "text/x.misskeymarkdown",

     

       194
       194
       +
                               "text/markdown",

     

       195
       195
       +
                           }:

     

       196
       196
       +
                               p_text += f"[{token.label}]({token.href})"

     

       197
       197
       +
                   else:

     

       198
       198
       +
                       return None

     

       199
       199
       +
       

     

       200
       200
       +
               return p_text

     

       201
       201
       +
       

     

       202
       202
       +
           def split_tokens_media(self, tokens: list[cross.Token], media: list[MediaInfo]):

     

       203
       203
       +
               split_tokens = cross.split_tokens(

     

       204
       204
       +
                   tokens, self.max_characters, self.characters_reserved_per_url

     

       205
       205
       +
               )

     

       206
       206
       +
               post_text: list[str] = []

     

       207
       207
       +
       

     

       208
       208
       +
               for block in split_tokens:

     

       209
       209
       +
                   baked_text = self.token_to_string(block)

     

       210
       210
       +
       

     

       211
       211
       +
                   if baked_text is None:

     

       212
       212
       +
                       return None

     

       213
       213
       +
                   post_text.append(baked_text)

     

       214
       214
       +
       

     

       215
       215
       +
               if not post_text:

     

       216
       216
       +
                   post_text = [""]

     

       217
       217
       +
       

     

       218
       218
       +
               posts: list[dict] = [

     

       219
       219
       +
                   {"text": post_text, "attachments": []} for post_text in post_text

     

       220
       220
       +
               ]

     

       221
       221
       +
               available_indices: list[int] = list(range(len(posts)))

     

       222
       222
       +
       

     

       223
       223
       +
               current_image_post_idx: int | None = None

     

       224
       224
       +
       

     

       225
       225
       +
               def make_blank_post() -> dict:

     

       226
       226
       +
                   return {"text": "", "attachments": []}

     

       227
       227
       +
       

     

       228
       228
       +
               def pop_next_empty_index() -> int:

     

       229
       229
       +
                   if available_indices:

     

       230
       230
       +
                       return available_indices.pop(0)

     

       231
       231
       +
                   else:

     

       232
       232
       +
                       new_idx = len(posts)

     

       233
       233
       +
                       posts.append(make_blank_post())

     

       234
       234
       +
                       return new_idx

     

       235
       235
       +
       

     

       236
       236
       +
               for att in media:

     

       237
       237
       +
                   if (

     

       238
       238
       +
                       current_image_post_idx is not None

     

       239
       239
       +
                       and len(posts[current_image_post_idx]["attachments"])

     

       240
       240
       +
                       < self.max_media_attachments

     

       241
       241
       +
                   ):

     

       242
       242
       +
                       posts[current_image_post_idx]["attachments"].append(att)

     

       243
       243
       +
                   else:

     

       244
       244
       +
                       idx = pop_next_empty_index()

     

       245
       245
       +
                       posts[idx]["attachments"].append(att)

     

       246
       246
       +
                       current_image_post_idx = idx

     

       247
       247
       +
       

     

       248
       248
       +
               result: list[tuple[str, list[MediaInfo]]] = []

     

       249
       249
       +
       

     

       250
       250
       +
               for p in posts:

     

       251
       251
       +
                   result.append((p["text"], p["attachments"]))

     

       252
       252
       +
       

     

       253
       253
       +
               return result

     

       254
       254
       +
       

     

       255
       255
       +
           def accept_post(self, post: cross.Post):

     

       256
       256
       +
               parent_id = post.get_parent_id()

     

       257
       257
       +
       

     

       258
       258
       +
               new_root_id: int | None = None

     

       259
       259
       +
               new_parent_id: int | None = None

     

       260
       260
       +
       

     

       261
       261
       +
               reply_ref: str | None = None

     

       262
       262
       +
               if parent_id:

     

       263
       263
       +
                   thread_tuple = database.find_mapped_thread(

     

       264
       264
       +
                       self.db,

     

       265
       265
       +
                       parent_id,

     

       266
       266
       +
                       self.input.user_id,

     

       267
       267
       +
                       self.input.service,

     

       268
       268
       +
                       self.user_id,

     

       269
       269
       +
                       self.service,

     

       270
       270
       +
                   )

     

       271
       271
       +
       

     

       272
       272
       +
                   if not thread_tuple:

     

       273
       273
       +
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       274
       274
       +
                       return None

     

       275
       275
       +
       

     

       276
       276
       +
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       277
       277
       +
       

     

       278
       278
       +
               lang: str

     

       279
       279
       +
               if post.get_languages():

     

       280
       280
       +
                   lang = post.get_languages()[0]

     

       281
       281
       +
               else:

     

       282
       282
       +
                   lang = "en"

     

       283
       283
       +
       

     

       284
       284
       +
               post_tokens = post.get_tokens()

     

       285
       285
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       286
       286
       +
                   post_tokens, status = mfm_util.strip_mfm(post_tokens)

     

       287
       287
       +
                   post_url = post.get_post_url()

     

       288
       288
       +
                   if status and post_url:

     

       289
       289
       +
                       post_tokens.append(cross.TextToken("\n"))

     

       290
       290
       +
                       post_tokens.append(

     

       291
       291
       +
                           cross.LinkToken(post_url, "[Post contains MFM, see original]")

     

       292
       292
       +
                       )

     

       293
       293
       +
       

     

       294
       294
       +
               raw_statuses = self.split_tokens_media(post_tokens, post.get_attachments())

     

       295
       295
       +
               if not raw_statuses:

     

       296
       296
       +
                   LOGGER.error("Failed to split post into statuses?")

     

       297
       297
       +
                   return None

     

       298
       298
       +
               baked_statuses = []

     

       299
       299
       +
       

     

       300
       300
       +
               for status, raw_media in raw_statuses:

     

       301
       301
       +
                   media: list[str] | None = None

     

       302
       302
       +
                   if raw_media:

     

       303
       303
       +
                       media = self.upload_media(raw_media)

     

       304
       304
       +
                       if not media:

     

       305
       305
       +
                           LOGGER.error("Failed to upload attachments!")

     

       306
       306
       +
                           return None

     

       307
       307
       +
                       baked_statuses.append((status, media))

     

       308
       308
       +
                       continue

     

       309
       309
       +
                   baked_statuses.append((status, []))

     

       310
       310
       +
       

     

       311
       311
       +
               created_statuses: list[str] = []

     

       312
       312
       +
       

     

       313
       313
       +
               for status, media in baked_statuses:

     

       314
       314
       +
                   payload = {

     

       315
       315
       +
                       "status": status,

     

       316
       316
       +
                       "media_ids": media or [],

     

       317
       317
       +
                       "spoiler_text": post.get_spoiler() or "",

     

       318
       318
       +
                       "visibility": self.options.get("visibility", "public"),

     

       319
       319
       +
                       "content_type": self.text_format,

     

       320
       320
       +
                       "language": lang,

     

       321
       321
       +
                   }

     

       322
       322
       +
       

     

       323
       323
       +
                   if media:

     

       324
       324
       +
                       payload["sensitive"] = post.is_sensitive()

     

       325
       325
       +
       

     

       326
       326
       +
                       if post.get_spoiler():

     

       327
       327
       +
                           payload["sensitive"] = True

     

       328
       328
       +
       

     

       329
       329
       +
                       if not status:

     

       330
       330
       +
                           payload["status"] = "🖼️"

     

       331
       331
       +
       

     

       332
       332
       +
                   if reply_ref:

     

       333
       333
       +
                       payload["in_reply_to_id"] = reply_ref

     

       334
       334
       +
       

     

       335
       335
       +
                   reqs = requests.post(

     

       336
       336
       +
                       f"{self.service}/api/v1/statuses",

     

       337
       337
       +
                       headers={

     

       338
       338
       +
                           "Authorization": f"Bearer {self.token}",

     

       339
       339
       +
                           "Content-Type": "application/json",

     

       340
       340
       +
                       },

     

       341
       341
       +
                       json=payload,

     

       342
       342
       +
                   )

     

       343
       343
       +
       

     

       344
       344
       +
                   if reqs.status_code != 200:

     

       345
       345
       +
                       LOGGER.info(

     

       346
       346
       +
                           "Failed to post status! %s - %s", reqs.status_code, reqs.text

     

       347
       347
       +
                       )

     

       348
       348
       +
                       reqs.raise_for_status()

     

       349
       349
       +
       

     

       350
       350
       +
                   reply_ref = reqs.json()["id"]

     

       351
       351
       +
                   LOGGER.info("Created new status %s!", reply_ref)

     

       352
       352
       +
       

     

       353
       353
       +
                   created_statuses.append(reqs.json()["id"])

     

       354
       354
       +
       

     

       355
       355
       +
               db_post = database.find_post(

     

       356
       356
       +
                   self.db, post.get_id(), self.input.user_id, self.input.service

     

       357
       357
       +
               )

     

       358
       358
       +
               assert db_post, "ghghghhhhh"

     

       359
       359
       +
       

     

       360
       360
       +
               if new_root_id is None or new_parent_id is None:

     

       361
       361
       +
                   new_root_id = database.insert_post(

     

       362
       362
       +
                       self.db, created_statuses[0], self.user_id, self.service

     

       363
       363
       +
                   )

     

       364
       364
       +
                   new_parent_id = new_root_id

     

       365
       365
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       366
       366
       +
                   created_statuses = created_statuses[1:]

     

       367
       367
       +
       

     

       368
       368
       +
               for db_id in created_statuses:

     

       369
       369
       +
                   new_parent_id = database.insert_reply(

     

       370
       370
       +
                       self.db, db_id, self.user_id, self.service, new_parent_id, new_root_id

     

       371
       371
       +
                   )

     

       372
       372
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       373
       373
       +
       

     

       374
       374
       +
           def delete_post(self, identifier: str):

     

       375
       375
       +
               post = database.find_post(

     

       376
       376
       +
                   self.db, identifier, self.input.user_id, self.input.service

     

       377
       377
       +
               )

     

       378
       378
       +
               if not post:

     

       379
       379
       +
                   return

     

       380
       380
       +
       

     

       381
       381
       +
               mappings = database.find_mappings(

     

       382
       382
       +
                   self.db, post["id"], self.service, self.user_id

     

       383
       383
       +
               )

     

       384
       384
       +
               for mapping in mappings[::-1]:

     

       385
       385
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       386
       386
       +
                   requests.delete(

     

       387
       387
       +
                       f"{self.service}/api/v1/statuses/{mapping[0]}",

     

       388
       388
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       389
       389
       +
                   )

     

       390
       390
       +
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       391
       391
       +
       

     

       392
       392
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       393
       393
       +
               repost = self.__delete_repost(repost_id)

     

       394
       394
       +
               if not repost:

     

       395
       395
       +
                   return None

     

       396
       396
       +
       

     

       397
       397
       +
               reposted = database.find_post(

     

       398
       398
       +
                   self.db, reposted_id, self.input.user_id, self.input.service

     

       399
       399
       +
               )

     

       400
       400
       +
               if not reposted:

     

       401
       401
       +
                   return

     

       402
       402
       +
       

     

       403
       403
       +
               mappings = database.find_mappings(

     

       404
       404
       +
                   self.db, reposted["id"], self.service, self.user_id

     

       405
       405
       +
               )

     

       406
       406
       +
               if mappings:

     

       407
       407
       +
                   rsp = requests.post(

     

       408
       408
       +
                       f"{self.service}/api/v1/statuses/{mappings[0][0]}/reblog",

     

       409
       409
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       410
       410
       +
                   )

     

       411
       411
       +
       

     

       412
       412
       +
                   if rsp.status_code != 200:

     

       413
       413
       +
                       LOGGER.error(

     

       414
       414
       +
                           "Failed to boost status! status_code: %s, msg: %s",

     

       415
       415
       +
                           rsp.status_code,

     

       416
       416
       +
                           rsp.content,

     

       417
       417
       +
                       )

     

       418
       418
       +
                       return

     

       419
       419
       +
       

     

       420
       420
       +
                   internal_id = database.insert_repost(

     

       421
       421
       +
                       self.db, rsp.json()["id"], reposted["id"], self.user_id, self.service

     

       422
       422
       +
                   )

     

       423
       423
       +
                   database.insert_mapping(self.db, repost["id"], internal_id)

     

       424
       424
       +
       

     

       425
       425
       +
           def __delete_repost(self, repost_id: str) -> dict | None:

     

       426
       426
       +
               repost = database.find_post(

     

       427
       427
       +
                   self.db, repost_id, self.input.user_id, self.input.service

     

       428
       428
       +
               )

     

       429
       429
       +
               if not repost:

     

       430
       430
       +
                   return None

     

       431
       431
       +
       

     

       432
       432
       +
               mappings = database.find_mappings(

     

       433
       433
       +
                   self.db, repost["id"], self.service, self.user_id

     

       434
       434
       +
               )

     

       435
       435
       +
               reposted_mappings = database.find_mappings(

     

       436
       436
       +
                   self.db, repost["reposted_id"], self.service, self.user_id

     

       437
       437
       +
               )

     

       438
       438
       +
               if mappings and reposted_mappings:

     

       439
       439
       +
                   LOGGER.info("Deleting '%s'...", mappings[0][0])

     

       440
       440
       +
                   requests.post(

     

       441
       441
       +
                       f"{self.service}/api/v1/statuses/{reposted_mappings[0][0]}/unreblog",

     

       442
       442
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       443
       443
       +
                   )

     

       444
       444
       +
                   database.delete_post(self.db, mappings[0][0], self.user_id, self.service)

     

       445
       445
       +
               return repost

     

       446
       446
       +
       

     

       447
       447
       +
           def delete_repost(self, repost_id: str):

     

       448
       448
       +
               self.__delete_repost(repost_id)

-602

mastodon.py

···

       1
       1
       -
       from util import LOGGER

     

       2
       2
       -
       import requests, websockets

     

       3
       3
       -
       import util, media_util, json, cross

     

       4
       4
       -
       import database

     

       5
       5
       -
       from database import DataBaseWorker

     

       6
       6
       -
       from typing import Callable, Any

     

       7
       7
       -
       import asyncio, time

     

       8
       8
       -
       

     

       9
       9
       -
       from bs4 import BeautifulSoup, Tag

     

       10
       10
       -
       from bs4.element import NavigableString

     

       11
       11
       -
       import markeddown

     

       12
       12
       -
       from html import unescape

     

       13
       13
       -
       

     

       14
       14
       -
       POSSIBLE_MIMES = [

     

       15
       15
       -
           'audio/ogg',

     

       16
       16
       -
           'audio/mp3',

     

       17
       17
       -
           'image/webp',

     

       18
       18
       -
           'image/jpeg',

     

       19
       19
       -
           'image/png',

     

       20
       20
       -
           'video/mp4',

     

       21
       21
       -
           'video/quicktime',

     

       22
       22
       -
           'video/webm'

     

       23
       23
       -
       ]

     

       24
       24
       -
           

     

       25
       25
       -
       md_parser = markeddown.HTMLToMarkdownParser()

     

       26
       26
       -
       md_parser.preserve_spaces = True

     

       27
       27
       -
       

     

       28
       28
       -
       def tokenize_post(status: dict) -> list[cross.Token]:

     

       29
       29
       -
           soup = BeautifulSoup(status['content'], "html.parser")

     

       30
       30
       -
           tokens: list[cross.Token] = []

     

       31
       31
       -
           

     

       32
       32
       -
           tags: list[dict] = status.get('tags', [])

     

       33
       33
       -
           mentions: list[dict] = status.get('mentions', [])

     

       34
       34
       -
           

     

       35
       35
       -
           def mdd(html):

     

       36
       36
       -
               md_parser.feed(unescape(html))

     

       37
       37
       -
               md = md_parser.get_markdown()

     

       38
       38
       -
               md_parser.reset()

     

       39
       39
       -
               return md

     

       40
       40
       -
           

     

       41
       41
       -
           def recurse(node) -> None:

     

       42
       42
       -
               if isinstance(node, NavigableString):

     

       43
       43
       -
                   tokens.append(cross.TextToken(str(node)))

     

       44
       44
       -
                   return

     

       45
       45
       -
               

     

       46
       46
       -
               if isinstance(node, Tag):

     

       47
       47
       -
                   if node.name.lower() == "a":

     

       48
       48
       -
                       href = node.get("href", "")

     

       49
       49
       -
                       inner_html = "".join(str(c) for c in node.contents)

     

       50
       50
       -
                       link_text_md = mdd(inner_html)

     

       51
       51
       -
                       

     

       52
       52
       -
                       if link_text_md.startswith('@'):

     

       53
       53
       -
                           as_mention = link_text_md[1:]

     

       54
       54
       -
                           for block in mentions:

     

       55
       55
       -
                               if href == block.get('url'):

     

       56
       56
       -
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       57
       57
       -
                                   return

     

       58
       58
       -
                               elif as_mention == block.get('acct') or as_mention == block.get('username'):

     

       59
       59
       -
                                   tokens.append(cross.MentionToken(block['acct'], block['url']))

     

       60
       60
       -
                                   return

     

       61
       61
       -
                       

     

       62
       62
       -
                       if link_text_md.startswith('#'):

     

       63
       63
       -
                           as_tag = link_text_md[1:].lower()

     

       64
       64
       -
                           if any(as_tag == block.get('name') for block in tags):

     

       65
       65
       -
                               tokens.append(cross.TagToken(link_text_md[1:]))

     

       66
       66
       -
                               return

     

       67
       67
       -
                       

     

       68
       68
       -
                       # idk if we can safely convert this to string

     

       69
       69
       -
                       tokens.append(cross.LinkToken(str(href), link_text_md))

     

       70
       70
       -
                       return

     

       71
       71
       -
                   

     

       72
       72
       -
                   if node.find("a") is not None:

     

       73
       73
       -
                       for child in node.contents:

     

       74
       74
       -
                           recurse(child)

     

       75
       75
       -
                       return

     

       76
       76
       -
                   

     

       77
       77
       -
                   serialized = str(node)

     

       78
       78
       -
                   markdownified = mdd(serialized)

     

       79
       79
       -
                   if markdownified:

     

       80
       80
       -
                       tokens.append(cross.TextToken(markdownified))

     

       81
       81
       -
                   return

     

       82
       82
       -
               return

     

       83
       83
       -
           

     

       84
       84
       -
           for child in soup.contents:

     

       85
       85
       -
               recurse(child)

     

       86
       86
       -
           

     

       87
       87
       -
           return tokens

     

       88
       88
       -
       

     

       89
       89
       -
       MARKDOWNY = ['text/x.misskeymarkdown', 'text/markdown', 'text/plain']

     

       90
       90
       -
           

     

       91
       91
       -
       class MastodonPost(cross.Post):

     

       92
       92
       -
           def __init__(self, status: dict, media_attachments: list[media_util.MediaInfo]) -> None:

     

       93
       93
       -
               super().__init__()

     

       94
       94
       -
               self.status = status

     

       95
       95
       -
               self.media_attachments = media_attachments

     

       96
       96
       -
               self.tokens = self.__to_tokens()

     

       97
       97
       -
               

     

       98
       98
       -
           

     

       99
       99
       -
           def __to_tokens(self):

     

       100
       100
       -
               content_type = self.status.get('content_type', 'text/plain')

     

       101
       101
       -
               raw_text = self.status.get('text')

     

       102
       102
       -
               

     

       103
       103
       -
               tags: list[str] = []

     

       104
       104
       -
               for tag in self.status.get('tags', []):

     

       105
       105
       -
                   tags.append(tag['name'])

     

       106
       106
       -
               

     

       107
       107
       -
               mentions: list[tuple[str, str]] = []

     

       108
       108
       -
               for mention in self.status.get('mentions', []):

     

       109
       109
       -
                   mentions.append(('@' + mention['username'], '@' + mention['acct']))

     

       110
       110
       -
               

     

       111
       111
       -
               if raw_text and content_type in MARKDOWNY:

     

       112
       112
       -
                   return cross.tokenize_markdown(raw_text, tags, mentions)

     

       113
       113
       -
               

     

       114
       114
       -
               pleroma_ext: dict | None = self.status.get('pleroma', {}).get('content')

     

       115
       115
       -
               if pleroma_ext:

     

       116
       116
       -
                   for ctype in MARKDOWNY:

     

       117
       117
       -
                       if ctype not in pleroma_ext:

     

       118
       118
       -
                           continue

     

       119
       119
       -
                       

     

       120
       120
       -
                       return cross.tokenize_markdown(pleroma_ext[ctype], tags, mentions)

     

       121
       121
       -
                       

     

       122
       122
       -
               return tokenize_post(self.status)

     

       123
       123
       -
                   

     

       124
       124
       -
           

     

       125
       125
       -
           def get_tokens(self) -> list[cross.Token]:

     

       126
       126
       -
               return self.tokens

     

       127
       127
       -
           

     

       128
       128
       -
           def get_parent_id(self) -> str | None:

     

       129
       129
       -
               return self.status.get('in_reply_to_id')

     

       130
       130
       -
           

     

       131
       131
       -
           def get_post_date_iso(self) -> str:

     

       132
       132
       -
               date = self.status.get('created_at')

     

       133
       133
       -
               return date or super().get_post_date_iso()

     

       134
       134
       -
           

     

       135
       135
       -
           def get_cw(self) -> str:

     

       136
       136
       -
               return self.status.get('spoiler_text') or ''

     

       137
       137
       -
           

     

       138
       138
       -
           def get_id(self) -> str:

     

       139
       139
       -
               return self.status['id']

     

       140
       140
       -
           

     

       141
       141
       -
           def get_languages(self) -> list[str]:

     

       142
       142
       -
               if self.status.get('language'):

     

       143
       143
       -
                   return [self.status['language']]

     

       144
       144
       -
               return []

     

       145
       145
       -
           

     

       146
       146
       -
           def is_sensitive(self) -> bool:

     

       147
       147
       -
               return self.status.get('sensitive', False)

     

       148
       148
       -
           

     

       149
       149
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       150
       150
       -
               return self.media_attachments

     

       151
       151
       -
       

     

       152
       152
       -
       ALLOWED_VISIBILITY = ['public', 'unlisted']

     

       153
       153
       -
           

     

       154
       154
       -
       class MastodonInputOptions():

     

       155
       155
       -
           def __init__(self, o: dict) -> None:

     

       156
       156
       -
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       157
       157
       -
               

     

       158
       158
       -
               allowed_visibility = o.get('allowed_visibility')

     

       159
       159
       -
               if allowed_visibility is not None:

     

       160
       160
       -
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       161
       161
       -
                       raise ValueError(f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}")

     

       162
       162
       -
                   self.allowed_visibility = allowed_visibility

     

       163
       163
       -
       

     

       164
       164
       -
       class MastodonInput(cross.Input):

     

       165
       165
       -
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       166
       166
       -
               self.options = MastodonInputOptions(settings.get('options', {}))

     

       167
       167
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       168
       168
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       169
       169
       -
               

     

       170
       170
       -
               service = instance[:-1] if instance.endswith('/') else instance

     

       171
       171
       -
               

     

       172
       172
       -
               LOGGER.info("Verifying %s credentails...", service)

     

       173
       173
       -
               responce = requests.get(f"{service}/api/v1/accounts/verify_credentials", headers={

     

       174
       174
       -
                   'Authorization': f'Bearer {self.token}'

     

       175
       175
       -
               })

     

       176
       176
       -
               if responce.status_code != 200:

     

       177
       177
       -
                   LOGGER.error("Failed to validate user credentials!")

     

       178
       178
       -
                   responce.raise_for_status()

     

       179
       179
       -
                   return

     

       180
       180
       -
               

     

       181
       181
       -
               super().__init__(service, responce.json()["id"], settings, db)

     

       182
       182
       -
               self.streaming = self._get_streaming_url()

     

       183
       183
       -
               

     

       184
       184
       -
               if not self.streaming:

     

       185
       185
       -
                   raise Exception("Instance %s does not support streaming!", service)

     

       186
       186
       -
       

     

       187
       187
       -
           def _get_streaming_url(self):

     

       188
       188
       -
               response = requests.get(f"{self.service}/api/v1/instance")

     

       189
       189
       -
               response.raise_for_status()

     

       190
       190
       -
               data: dict = response.json()

     

       191
       191
       -
               return (data.get('urls') or {}).get('streaming_api')

     

       192
       192
       -
           

     

       193
       193
       -
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       194
       194
       -
               # skip events from other users

     

       195
       195
       -
               if (status.get('account') or {})['id'] != self.user_id:

     

       196
       196
       -
                   return

     

       197
       197
       -
               

     

       198
       198
       -
               if status.get('reblog') or (status.get('quote_id') or status.get('quote')) or status.get('poll'):

     

       199
       199
       -
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       200
       200
       -
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       201
       201
       -
                   # we don't handle quotes.

     

       202
       202
       -
                   LOGGER.info("Skipping '%s'! Reblog, quote or poll..", status['id'])

     

       203
       203
       -
                   return

     

       204
       204
       -
               

     

       205
       205
       -
               in_reply: str | None = status.get('in_reply_to_id')

     

       206
       206
       -
               in_reply_to: str | None = status.get('in_reply_to_account_id')

     

       207
       207
       -
               if in_reply_to and in_reply_to != self.user_id:

     

       208
       208
       -
                   # We don't support replies.

     

       209
       209
       -
                   LOGGER.info("Skipping '%s'! Reply to other user..", status['id'])

     

       210
       210
       -
                   return

     

       211
       211
       -
               

     

       212
       212
       -
               if status.get('visibility') not in self.options.allowed_visibility:

     

       213
       213
       -
                   # Skip f/o and direct posts

     

       214
       214
       -
                   LOGGER.info("Skipping '%s'! '%s' visibility..", status['id'], status.get('visibility'))

     

       215
       215
       -
                   return

     

       216
       216
       -
               

     

       217
       217
       -
               success = database.try_insert_post(self.db, status['id'], in_reply, self.user_id, self.service)

     

       218
       218
       -
               if not success:

     

       219
       219
       -
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", status['id'])

     

       220
       220
       -
                   return

     

       221
       221
       -
               

     

       222
       222
       -
               LOGGER.info("Crossposting '%s'...", status['id'])

     

       223
       223
       -
               

     

       224
       224
       -
               media_attachments: list[media_util.MediaInfo] = []

     

       225
       225
       -
               for attachment in status.get('media_attachments', []):

     

       226
       226
       -
                   LOGGER.info("Downloading %s...", attachment['url'])

     

       227
       227
       -
                   info = media_util.download_media(attachment['url'], attachment.get('description') or '')

     

       228
       228
       -
                   if not info:

     

       229
       229
       -
                       LOGGER.error("Skipping '%s'. Failed to download media!", status['id'])

     

       230
       230
       -
                       return

     

       231
       231
       -
                   media_attachments.append(info)

     

       232
       232
       -
               

     

       233
       233
       -
               cross_post = MastodonPost(status, media_attachments)

     

       234
       234
       -
               for output in outputs:

     

       235
       235
       -
                   output.accept_post(cross_post)

     

       236
       236
       -
           

     

       237
       237
       -
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       238
       238
       -
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       239
       239
       -
               if not post:

     

       240
       240
       -
                   return

     

       241
       241
       -
               

     

       242
       242
       -
               LOGGER.info("Deleting '%s'...", identifier)

     

       243
       243
       -
               for output in outputs:

     

       244
       244
       -
                   output.delete_post(identifier)

     

       245
       245
       -
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       246
       246
       -
           

     

       247
       247
       -
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       248
       248
       -
               if event == 'update':

     

       249
       249
       -
                   self._on_create_post(outputs, json.loads(payload))

     

       250
       250
       -
               elif event == 'delete':

     

       251
       251
       -
                   self._on_delete_post(outputs, payload)          

     

       252
       252
       -
           

     

       253
       253
       -
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       254
       254
       -
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       255
       255
       -
               

     

       256
       256
       -
               async for ws in websockets.connect(uri, extra_headers={"User-Agent": "XPost/0.0.3"}):

     

       257
       257
       -
                   try:

     

       258
       258
       -
                       LOGGER.info("Listening to %s...", self.streaming)

     

       259
       259
       -
                       

     

       260
       260
       -
                       async def listen_for_messages():

     

       261
       261
       -
                           async for msg in ws:

     

       262
       262
       -
                               data = json.loads(msg)

     

       263
       263
       -
                               event: str = data.get('event')

     

       264
       264
       -
                               payload: str = data.get('payload')

     

       265
       265
       -
                   

     

       266
       266
       -
                               submit(lambda: self._on_post(outputs, str(event), str(payload)))

     

       267
       267
       -
                   

     

       268
       268
       -
                       listen = asyncio.create_task(listen_for_messages())

     

       269
       269
       -
                       

     

       270
       270
       -
                       await asyncio.gather(listen)

     

       271
       271
       -
                   except websockets.ConnectionClosedError as e:

     

       272
       272
       -
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       273
       273
       -
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       274
       274
       -
                       continue

     

       275
       275
       -
       

     

       276
       276
       -
       ALLOWED_POSTING_VISIBILITY = ['public', 'unlisted', 'private']

     

       277
       277
       -
       

     

       278
       278
       -
       class MastodonOutputOptions():

     

       279
       279
       -
           def __init__(self, o: dict) -> None:

     

       280
       280
       -
               self.visibility = 'public'

     

       281
       281
       -
               

     

       282
       282
       -
               visibility = o.get('visibility')

     

       283
       283
       -
               if visibility is not None:

     

       284
       284
       -
                   if visibility not in ALLOWED_POSTING_VISIBILITY:

     

       285
       285
       -
                       raise ValueError(f"'visibility' only accepts {', '.join(ALLOWED_POSTING_VISIBILITY)}, got: {visibility}")

     

       286
       286
       -
                   self.visibility = visibility

     

       287
       287
       -
       

     

       288
       288
       -
       class MastodonOutput(cross.Output):

     

       289
       289
       -
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       290
       290
       -
               super().__init__(input, settings, db)

     

       291
       291
       -
               self.options = settings.get('options') or {}

     

       292
       292
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       293
       293
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       294
       294
       -
               

     

       295
       295
       -
               self.service = instance[:-1] if instance.endswith('/') else instance

     

       296
       296
       -
               

     

       297
       297
       -
               LOGGER.info("Verifying %s credentails...", self.service)

     

       298
       298
       -
               responce = requests.get(f"{self.service}/api/v1/accounts/verify_credentials", headers={

     

       299
       299
       -
                   'Authorization': f'Bearer {self.token}'

     

       300
       300
       -
               })

     

       301
       301
       -
               if responce.status_code != 200:

     

       302
       302
       -
                   LOGGER.error("Failed to validate user credentials!")

     

       303
       303
       -
                   responce.raise_for_status()

     

       304
       304
       -
                   return

     

       305
       305
       -
               self.user_id: str = responce.json()["id"]

     

       306
       306
       -
       

     

       307
       307
       -
               LOGGER.info("Getting %s configuration...", self.service)

     

       308
       308
       -
               responce = requests.get(f"{self.service}/api/v1/instance", headers={

     

       309
       309
       -
                   'Authorization': f'Bearer {self.token}'

     

       310
       310
       -
               })

     

       311
       311
       -
               if responce.status_code != 200:

     

       312
       312
       -
                   LOGGER.error("Failed to get instance info!")

     

       313
       313
       -
                   responce.raise_for_status()

     

       314
       314
       -
                   return

     

       315
       315
       -
               

     

       316
       316
       -
               instance_info: dict = responce.json()

     

       317
       317
       -
               configuration: dict = instance_info['configuration']

     

       318
       318
       -
               

     

       319
       319
       -
               statuses_config: dict = configuration.get('statuses', {})

     

       320
       320
       -
               self.max_characters: int = statuses_config.get('max_characters', 500)

     

       321
       321
       -
               self.max_media_attachments: int = statuses_config.get('max_media_attachments', 4)

     

       322
       322
       -
               self.characters_reserved_per_url: int = statuses_config.get('characters_reserved_per_url', 23)

     

       323
       323
       -
               

     

       324
       324
       -
               media_config: dict = configuration.get('media_attachments', {})

     

       325
       325
       -
               self.image_size_limit: int = media_config.get('image_size_limit', 16777216)

     

       326
       326
       -
               self.video_size_limit: int = media_config.get('video_size_limit', 103809024)

     

       327
       327
       -
               self.supported_mime_types: list[str] = media_config.get('supported_mime_types', POSSIBLE_MIMES)

     

       328
       328
       -
               

     

       329
       329
       -
               # *oma: max post chars

     

       330
       330
       -
               max_toot_chars = instance_info.get('max_toot_chars')

     

       331
       331
       -
               if max_toot_chars:

     

       332
       332
       -
                   self.max_characters: int = max_toot_chars

     

       333
       333
       -
               

     

       334
       334
       -
               # *oma: max upload limit

     

       335
       335
       -
               upload_limit = instance_info.get('upload_limit')

     

       336
       336
       -
               if upload_limit:

     

       337
       337
       -
                   self.image_size_limit: int = upload_limit

     

       338
       338
       -
                   self.video_size_limit: int = upload_limit

     

       339
       339
       -
               

     

       340
       340
       -
               # *oma ext: supported text types

     

       341
       341
       -
               self.text_format = 'text/plain'

     

       342
       342
       -
               pleroma = instance_info.get('pleroma')

     

       343
       343
       -
               if pleroma:

     

       344
       344
       -
                   post_formats: list[str] = pleroma.get('metadata', {}).get('post_formats', [])

     

       345
       345
       -
                   if 'text/x.misskeymarkdown' in post_formats:

     

       346
       346
       -
                       self.text_format = 'text/x.misskeymarkdown'

     

       347
       347
       -
                   elif 'text/markdown' in post_formats:

     

       348
       348
       -
                       self.text_format = 'text/markdown'

     

       349
       349
       -
           

     

       350
       350
       -
           def upload_media(self, attachments: list[media_util.MediaInfo]) -> list[str] | None:

     

       351
       351
       -
               for a in attachments:

     

       352
       352
       -
                   if a.mime.startswith('image/') and len(a.io) > self.image_size_limit:

     

       353
       353
       -
                       return None

     

       354
       354
       -
                   

     

       355
       355
       -
                   if a.mime.startswith('video/') and len(a.io) > self.video_size_limit:

     

       356
       356
       -
                       return None

     

       357
       357
       -
                   

     

       358
       358
       -
                   if not a.mime.startswith('image/') and not a.mime.startswith('video/'):

     

       359
       359
       -
                       if len(a.io) > 7_000_000:

     

       360
       360
       -
                           return None

     

       361
       361
       -
               

     

       362
       362
       -
               uploads: list[dict] = []

     

       363
       363
       -
               for a in attachments:

     

       364
       364
       -
                   data = {}

     

       365
       365
       -
                   if a.alt:

     

       366
       366
       -
                       data['description'] = a.alt

     

       367
       367
       -
                   

     

       368
       368
       -
                   req = requests.post(f"{self.service}/api/v2/media", headers= {

     

       369
       369
       -
                       'Authorization': f'Bearer {self.token}'

     

       370
       370
       -
                   }, files={'file': (a.name, a.io, a.mime)}, data=data)

     

       371
       371
       -
                   

     

       372
       372
       -
                   if req.status_code == 200:

     

       373
       373
       -
                       LOGGER.info("Uploaded %s! (%s)", a.name, req.json()['id'])

     

       374
       374
       -
                       uploads.append({

     

       375
       375
       -
                           'done': True,

     

       376
       376
       -
                           'id': req.json()['id']

     

       377
       377
       -
                       })

     

       378
       378
       -
                   elif req.status_code == 202:

     

       379
       379
       -
                       LOGGER.info("Waiting for %s to process!", a.name)

     

       380
       380
       -
                       uploads.append({

     

       381
       381
       -
                           'done': False,

     

       382
       382
       -
                           'id': req.json()['id']

     

       383
       383
       -
                       })

     

       384
       384
       -
                   else:

     

       385
       385
       -
                       LOGGER.error("Failed to upload %s! %s", a.name, req.text)

     

       386
       386
       -
                       req.raise_for_status()

     

       387
       387
       -
               

     

       388
       388
       -
               while any([not val['done'] for val in uploads]):

     

       389
       389
       -
                   LOGGER.info("Waiting for media to process...")

     

       390
       390
       -
                   time.sleep(3)

     

       391
       391
       -
                   for media in uploads:

     

       392
       392
       -
                       if media['done']:

     

       393
       393
       -
                           continue

     

       394
       394
       -
                       

     

       395
       395
       -
                       reqs = requests.get(f'{self.service}/api/v1/media/{media['id']}', headers={

     

       396
       396
       -
                           'Authorization': f'Bearer {self.token}'

     

       397
       397
       -
                       })

     

       398
       398
       -
                       

     

       399
       399
       -
                       if reqs.status_code == 206:

     

       400
       400
       -
                           continue

     

       401
       401
       -
                       

     

       402
       402
       -
                       if reqs.status_code == 200:

     

       403
       403
       -
                           media['done'] = True

     

       404
       404
       -
                           continue

     

       405
       405
       -
                       reqs.raise_for_status()

     

       406
       406
       -
               

     

       407
       407
       -
               return [val['id'] for val in uploads]

     

       408
       408
       -
       

     

       409
       409
       -
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       410
       410
       -
               p_text: str = ''

     

       411
       411
       -
                   

     

       412
       412
       -
               for token in tokens:

     

       413
       413
       -
                   if isinstance(token, cross.TextToken):

     

       414
       414
       -
                       p_text += token.text

     

       415
       415
       -
                   elif isinstance(token, cross.TagToken):

     

       416
       416
       -
                       p_text += '#' + token.tag

     

       417
       417
       -
                   elif isinstance(token, cross.LinkToken):

     

       418
       418
       -
                       if util.canonical_label(token.label, token.href):

     

       419
       419
       -
                           p_text += token.href

     

       420
       420
       -
                       else:

     

       421
       421
       -
                           if self.text_format == 'text/plain':

     

       422
       422
       -
                               p_text += f'{token.label}: {token.href}'

     

       423
       423
       -
                           elif self.text_format in {'text/x.misskeymarkdown', 'text/markdown'}:

     

       424
       424
       -
                               p_text += f'[{token.label}]({token.href})'

     

       425
       425
       -
                   else:

     

       426
       426
       -
                       return None

     

       427
       427
       -
               

     

       428
       428
       -
               return p_text

     

       429
       429
       -
       

     

       430
       430
       -
           def split_tokens_media(self, tokens: list[cross.Token], media: list[media_util.MediaInfo]):

     

       431
       431
       -
               split_tokens = cross.split_tokens(tokens, self.max_characters, self.characters_reserved_per_url)

     

       432
       432
       -
               post_text: list[str] = []

     

       433
       433
       -
               

     

       434
       434
       -
               for block in split_tokens:

     

       435
       435
       -
                   baked_text = self.token_to_string(block)

     

       436
       436
       -
                   

     

       437
       437
       -
                   if baked_text is None:

     

       438
       438
       -
                       return None

     

       439
       439
       -
                   post_text.append(baked_text)

     

       440
       440
       -
                       

     

       441
       441
       -
               if not post_text:

     

       442
       442
       -
                   post_text = ['']

     

       443
       443
       -
               

     

       444
       444
       -
               posts: list[dict] = [{"text": post_text, "attachments": []} for post_text in post_text]

     

       445
       445
       -
               available_indices: list[int] = list(range(len(posts)))

     

       446
       446
       -
               

     

       447
       447
       -
               current_image_post_idx: int | None = None

     

       448
       448
       -
               

     

       449
       449
       -
               def make_blank_post() -> dict:

     

       450
       450
       -
                   return {

     

       451
       451
       -
                       "text": '',

     

       452
       452
       -
                       "attachments": []

     

       453
       453
       -
                   }

     

       454
       454
       -
               

     

       455
       455
       -
               def pop_next_empty_index() -> int:

     

       456
       456
       -
                   if available_indices:

     

       457
       457
       -
                       return available_indices.pop(0)

     

       458
       458
       -
                   else:

     

       459
       459
       -
                       new_idx = len(posts)

     

       460
       460
       -
                       posts.append(make_blank_post())

     

       461
       461
       -
                       return new_idx

     

       462
       462
       -
               

     

       463
       463
       -
               for att in media:

     

       464
       464
       -
                   if (

     

       465
       465
       -
                       current_image_post_idx is not None

     

       466
       466
       -
                       and len(posts[current_image_post_idx]["attachments"]) < self.max_media_attachments

     

       467
       467
       -
                   ):

     

       468
       468
       -
                       posts[current_image_post_idx]["attachments"].append(att)

     

       469
       469
       -
                   else:

     

       470
       470
       -
                       idx = pop_next_empty_index()

     

       471
       471
       -
                       posts[idx]["attachments"].append(att)

     

       472
       472
       -
                       current_image_post_idx = idx

     

       473
       473
       -
               

     

       474
       474
       -
               result: list[tuple[str, list[media_util.MediaInfo]]] = []

     

       475
       475
       -
               

     

       476
       476
       -
               for p in posts:

     

       477
       477
       -
                   result.append((p['text'], p["attachments"]))

     

       478
       478
       -
               

     

       479
       479
       -
               return result

     

       480
       480
       -
               

     

       481
       481
       -
           def accept_post(self, post: cross.Post):

     

       482
       482
       -
               parent_id = post.get_parent_id()

     

       483
       483
       -
               

     

       484
       484
       -
               new_root_id: int | None = None

     

       485
       485
       -
               new_parent_id: int | None = None

     

       486
       486
       -
               

     

       487
       487
       -
               reply_ref: str | None = None

     

       488
       488
       -
               if parent_id:

     

       489
       489
       -
                   thread_tuple = database.find_mapped_thread(

     

       490
       490
       -
                       self.db,

     

       491
       491
       -
                       parent_id,

     

       492
       492
       -
                       self.input.user_id,

     

       493
       493
       -
                       self.input.service,

     

       494
       494
       -
                       self.user_id,

     

       495
       495
       -
                       self.service

     

       496
       496
       -
                   )

     

       497
       497
       -
                   

     

       498
       498
       -
                   if not thread_tuple:

     

       499
       499
       -
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       500
       500
       -
                       return None

     

       501
       501
       -
                   

     

       502
       502
       -
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       503
       503
       -
               

     

       504
       504
       -
               lang: str

     

       505
       505
       -
               if post.get_languages():

     

       506
       506
       -
                   lang = post.get_languages()[0]

     

       507
       507
       -
               else:

     

       508
       508
       -
                   lang = 'en'

     

       509
       509
       -
               

     

       510
       510
       -
               raw_statuses = self.split_tokens_media(post.get_tokens(), post.get_attachments())

     

       511
       511
       -
               if not raw_statuses:

     

       512
       512
       -
                   LOGGER.error("Failed to split post into statuses?")

     

       513
       513
       -
                   return None

     

       514
       514
       -
               baked_statuses = []

     

       515
       515
       -
               

     

       516
       516
       -
               for status, raw_media in raw_statuses:

     

       517
       517
       -
                   media: list[str] | None = None

     

       518
       518
       -
                   if raw_media:

     

       519
       519
       -
                       media = self.upload_media(raw_media)

     

       520
       520
       -
                       if not media:

     

       521
       521
       -
                           LOGGER.error("Failed to upload attachments!")

     

       522
       522
       -
                           return None

     

       523
       523
       -
                       baked_statuses.append((status, media))

     

       524
       524
       -
                       continue

     

       525
       525
       -
                   baked_statuses.append((status,[]))

     

       526
       526
       -
               

     

       527
       527
       -
               created_statuses: list[str] = []

     

       528
       528
       -
                   

     

       529
       529
       -
               for status, media in baked_statuses:

     

       530
       530
       -
                   payload = {

     

       531
       531
       -
                       'status': status,

     

       532
       532
       -
                       'media_ids': media or [],

     

       533
       533
       -
                       'spoiler_text': post.get_cw(),

     

       534
       534
       -
                       'visibility': self.options.get('visibility', 'public'),

     

       535
       535
       -
                       'content_type': self.text_format,

     

       536
       536
       -
                       'language': lang

     

       537
       537
       -
                   }

     

       538
       538
       -
                   

     

       539
       539
       -
                   if media:

     

       540
       540
       -
                       payload['sensitive'] = post.is_sensitive()

     

       541
       541
       -
                       

     

       542
       542
       -
                       if post.get_cw():

     

       543
       543
       -
                           payload['sensitive'] = True

     

       544
       544
       -
                       

     

       545
       545
       -
                       if not status:

     

       546
       546
       -
                           payload['status'] = '🖼️'

     

       547
       547
       -
                   

     

       548
       548
       -
                   if reply_ref:

     

       549
       549
       -
                       payload['in_reply_to_id'] = reply_ref

     

       550
       550
       -
               

     

       551
       551
       -
                   reqs = requests.post(f'{self.service}/api/v1/statuses', headers={

     

       552
       552
       -
                       'Authorization': f'Bearer {self.token}',

     

       553
       553
       -
                       'Content-Type': 'application/json'

     

       554
       554
       -
                   }, json=payload)

     

       555
       555
       -
               

     

       556
       556
       -
                   if reqs.status_code != 200:

     

       557
       557
       -
                       LOGGER.info("Failed to post status! %s - %s", reqs.status_code, reqs.text)

     

       558
       558
       -
                       reqs.raise_for_status()

     

       559
       559
       -
                   

     

       560
       560
       -
                   reply_ref = reqs.json()['id']

     

       561
       561
       -
                   LOGGER.info("Created new status %s!", reply_ref)

     

       562
       562
       -
                       

     

       563
       563
       -
                   created_statuses.append(reqs.json()['id'])

     

       564
       564
       -
               

     

       565
       565
       -
               db_post = database.find_post(self.db, post.get_id(), self.input.user_id, self.input.service)

     

       566
       566
       -
               assert db_post, "ghghghhhhh"

     

       567
       567
       -
               

     

       568
       568
       -
               if new_root_id is None or  new_parent_id is None:

     

       569
       569
       -
                   new_root_id = database.insert_post(

     

       570
       570
       -
                       self.db,

     

       571
       571
       -
                       created_statuses[0],

     

       572
       572
       -
                       self.user_id,

     

       573
       573
       -
                       self.service

     

       574
       574
       -
                   )

     

       575
       575
       -
                   new_parent_id = new_root_id

     

       576
       576
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       577
       577
       -
                   created_statuses = created_statuses[1:]

     

       578
       578
       -
               

     

       579
       579
       -
               for db_id in created_statuses:

     

       580
       580
       -
                   new_parent_id = database.insert_reply(

     

       581
       581
       -
                       self.db, 

     

       582
       582
       -
                       db_id,

     

       583
       583
       -
                       self.user_id,

     

       584
       584
       -
                       self.service,

     

       585
       585
       -
                       new_parent_id,

     

       586
       586
       -
                       new_root_id

     

       587
       587
       -
                   )

     

       588
       588
       -
                   database.insert_mapping(self.db, db_post['id'], new_parent_id)

     

       589
       589
       -
           

     

       590
       590
       -
           def delete_post(self, identifier: str):

     

       591
       591
       -
               post = database.find_post(self.db, identifier, self.input.user_id, self.input.service)

     

       592
       592
       -
               if not post:

     

       593
       593
       -
                   return

     

       594
       594
       -
               

     

       595
       595
       -
               mappings = database.find_mappings(self.db, post['id'], self.service, self.user_id)

     

       596
       596
       -
               for mapping in mappings[::-1]:

     

       597
       597
       -
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       598
       598
       -
                   requests.delete(f'{self.service}/api/v1/statuses/{mapping[0]}', headers={

     

       599
       599
       -
                       'Authorization': f'Bearer {self.token}'

     

       600
       600
       -
                   })

     

       601
       601
       -
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       602
       602
       -

-143

media_util.py

···

       1
       1
       -
       import requests

     

       2
       2
       -
       import subprocess

     

       3
       3
       -
       import json

     

       4
       4
       -
       import re, urllib.parse, os

     

       5
       5
       -
       from util import LOGGER

     

       6
       6
       -
       import magic

     

       7
       7
       -
       

     

       8
       8
       -
       FILENAME = re.compile(r'filename="?([^\";]*)"?')

     

       9
       9
       -
       MAGIC = magic.Magic(mime=True)

     

       10
       10
       -
       

     

       11
       11
       -
       class MediaInfo():

     

       12
       12
       -
           def __init__(self, url: str, name: str, mime: str, alt: str, io: bytes) -> None:

     

       13
       13
       -
               self.url = url

     

       14
       14
       -
               self.name = name

     

       15
       15
       -
               self.mime = mime

     

       16
       16
       -
               self.alt = alt

     

       17
       17
       -
               self.io = io

     

       18
       18
       -
       

     

       19
       19
       -
       def download_media(url: str, alt: str) -> MediaInfo | None:

     

       20
       20
       -
           name = get_filename_from_url(url)

     

       21
       21
       -
           io = download_blob(url, max_bytes=100_000_000)

     

       22
       22
       -
           if not io:

     

       23
       23
       -
               LOGGER.error("Failed to download media attachment! %s", url)

     

       24
       24
       -
               return None

     

       25
       25
       -
           mime = MAGIC.from_buffer(io)

     

       26
       26
       -
           if not mime:

     

       27
       27
       -
               mime = 'application/octet-stream'

     

       28
       28
       -
           return MediaInfo(url, name, mime, alt, io)

     

       29
       29
       -
       

     

       30
       30
       -
       def get_filename_from_url(url):

     

       31
       31
       -
           try:

     

       32
       32
       -
               response = requests.head(url, allow_redirects=True)

     

       33
       33
       -
               disposition = response.headers.get('Content-Disposition')

     

       34
       34
       -
               if disposition:

     

       35
       35
       -
                   filename = FILENAME.findall(disposition)

     

       36
       36
       -
                   if filename:

     

       37
       37
       -
                       return filename[0]

     

       38
       38
       -
           except requests.RequestException:

     

       39
       39
       -
               pass

     

       40
       40
       -
       

     

       41
       41
       -
           parsed_url = urllib.parse.urlparse(url)

     

       42
       42
       -
           base_name = os.path.basename(parsed_url.path)

     

       43
       43
       -
           

     

       44
       44
       -
           # hardcoded fix to return the cid for pds

     

       45
       45
       -
           if base_name == 'com.atproto.sync.getBlob':

     

       46
       46
       -
               qs = urllib.parse.parse_qs(parsed_url.query)

     

       47
       47
       -
               if qs and qs.get('cid'):

     

       48
       48
       -
                   return qs['cid'][0]

     

       49
       49
       -
       

     

       50
       50
       -
           return base_name

     

       51
       51
       -
       

     

       52
       52
       -
       def probe_bytes(bytes: bytes) -> dict:

     

       53
       53
       -
           cmd = [

     

       54
       54
       -
               'ffprobe',

     

       55
       55
       -
               '-v', 'error',

     

       56
       56
       -
               '-show_format',

     

       57
       57
       -
               '-show_streams',

     

       58
       58
       -
               '-print_format', 'json',

     

       59
       59
       -
               'pipe:0'

     

       60
       60
       -
           ]

     

       61
       61
       -
           proc = subprocess.run(cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     

       62
       62
       -
       

     

       63
       63
       -
           if proc.returncode != 0:

     

       64
       64
       -
               raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")

     

       65
       65
       -
       

     

       66
       66
       -
           return json.loads(proc.stdout)

     

       67
       67
       -
       

     

       68
       68
       -
       def convert_to_mp4(video_bytes: bytes) -> bytes:

     

       69
       69
       -
           cmd = [

     

       70
       70
       -
               'ffmpeg',

     

       71
       71
       -
               '-i', 'pipe:0',

     

       72
       72
       -
               '-c:v', 'libx264',

     

       73
       73
       -
               '-crf', '30',

     

       74
       74
       -
               '-preset', 'slow',

     

       75
       75
       -
               '-c:a', 'aac',

     

       76
       76
       -
               '-b:a', '128k',

     

       77
       77
       -
               '-movflags', 'frag_keyframe+empty_moov+default_base_moof',

     

       78
       78
       -
               '-f', 'mp4',

     

       79
       79
       -
               'pipe:1'

     

       80
       80
       -
           ]

     

       81
       81
       -
           

     

       82
       82
       -
           proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     

       83
       83
       -
           out_bytes, err = proc.communicate(input=video_bytes)

     

       84
       84
       -
           

     

       85
       85
       -
           if proc.returncode != 0:

     

       86
       86
       -
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       87
       87
       -
           

     

       88
       88
       -
           return out_bytes

     

       89
       89
       -
       

     

       90
       90
       -
       def compress_image(image_bytes: bytes, quality: int = 90):

     

       91
       91
       -
           cmd = [

     

       92
       92
       -
                   'ffmpeg',

     

       93
       93
       -
                   '-f', 'image2pipe',

     

       94
       94
       -
                   '-i', 'pipe:0',

     

       95
       95
       -
                   '-c:v', 'webp',

     

       96
       96
       -
                   '-q:v', str(quality),

     

       97
       97
       -
                   '-f', 'image2pipe',

     

       98
       98
       -
                   'pipe:1'

     

       99
       99
       -
               ]

     

       100
       100
       -
       

     

       101
       101
       -
           proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     

       102
       102
       -
           out_bytes, err = proc.communicate(input=image_bytes)

     

       103
       103
       -
           

     

       104
       104
       -
           if proc.returncode != 0:

     

       105
       105
       -
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       106
       106
       -
           

     

       107
       107
       -
           return out_bytes

     

       108
       108
       -
       

     

       109
       109
       -
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       110
       110
       -
           response = requests.get(url, stream=True, timeout=20)

     

       111
       111
       -
           if response.status_code != 200:

     

       112
       112
       -
               LOGGER.info("Failed to download %s! %s", url, response.text)

     

       113
       113
       -
               return None

     

       114
       114
       -
           

     

       115
       115
       -
           downloaded_bytes = b""

     

       116
       116
       -
           current_size = 0

     

       117
       117
       -
           

     

       118
       118
       -
           for chunk in response.iter_content(chunk_size=8192):

     

       119
       119
       -
               if not chunk: 

     

       120
       120
       -
                   continue

     

       121
       121
       -
               

     

       122
       122
       -
               current_size += len(chunk)

     

       123
       123
       -
               if current_size > max_bytes:

     

       124
       124
       -
                   response.close()

     

       125
       125
       -
                   return None

     

       126
       126
       -
               

     

       127
       127
       -
               downloaded_bytes += chunk

     

       128
       128
       -
           

     

       129
       129
       -
           return downloaded_bytes

     

       130
       130
       -
           

     

       131
       131
       -
       

     

       132
       132
       -
       def get_media_meta(bytes: bytes):

     

       133
       133
       -
           probe = probe_bytes(bytes)

     

       134
       134
       -
           streams = [s for s in probe['streams'] if s['codec_type'] == 'video']

     

       135
       135
       -
           if not streams:

     

       136
       136
       -
               raise ValueError("No video stream found")

     

       137
       137
       -
               

     

       138
       138
       -
           media = streams[0]

     

       139
       139
       -
           return {

     

       140
       140
       -
               'width': int(media['width']),

     

       141
       141
       -
               'height': int(media['height']),

     

       142
       142
       -
               'duration': float(media.get('duration', probe['format'].get('duration', -1)))

     

       143
       143
       -
           }

+54

misskey/common.py

···

       1
       1
       +
       import cross

     

       2
       2
       +
       from util.media import MediaInfo

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       class MisskeyPost(cross.Post):

     

       6
       6
       +
           def __init__(

     

       7
       7
       +
               self,

     

       8
       8
       +
               instance_url: str,

     

       9
       9
       +
               note: dict,

     

       10
       10
       +
               tokens: list[cross.Token],

     

       11
       11
       +
               files: list[MediaInfo],

     

       12
       12
       +
           ) -> None:

     

       13
       13
       +
               super().__init__()

     

       14
       14
       +
               self.note = note

     

       15
       15
       +
               self.id = note["id"]

     

       16
       16
       +
               self.parent_id = note.get("replyId")

     

       17
       17
       +
               self.tokens = tokens

     

       18
       18
       +
               self.timestamp = note["createdAt"]

     

       19
       19
       +
               self.media_attachments = files

     

       20
       20
       +
               self.spoiler = note.get("cw")

     

       21
       21
       +
               self.sensitive = any(

     

       22
       22
       +
                   [a.get("isSensitive", False) for a in note.get("files", [])]

     

       23
       23
       +
               )

     

       24
       24
       +
               self.url = instance_url + "/notes/" + note["id"]

     

       25
       25
       +
       

     

       26
       26
       +
           def get_id(self) -> str:

     

       27
       27
       +
               return self.id

     

       28
       28
       +
       

     

       29
       29
       +
           def get_parent_id(self) -> str | None:

     

       30
       30
       +
               return self.parent_id

     

       31
       31
       +
       

     

       32
       32
       +
           def get_tokens(self) -> list[cross.Token]:

     

       33
       33
       +
               return self.tokens

     

       34
       34
       +
       

     

       35
       35
       +
           def get_text_type(self) -> str:

     

       36
       36
       +
               return "text/x.misskeymarkdown"

     

       37
       37
       +
       

     

       38
       38
       +
           def get_timestamp(self) -> str:

     

       39
       39
       +
               return self.timestamp

     

       40
       40
       +
       

     

       41
       41
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       42
       42
       +
               return self.media_attachments

     

       43
       43
       +
       

     

       44
       44
       +
           def get_spoiler(self) -> str | None:

     

       45
       45
       +
               return self.spoiler

     

       46
       46
       +
       

     

       47
       47
       +
           def get_languages(self) -> list[str]:

     

       48
       48
       +
               return []

     

       49
       49
       +
       

     

       50
       50
       +
           def is_sensitive(self) -> bool:

     

       51
       51
       +
               return self.sensitive or (self.spoiler is not None and self.spoiler != "")

     

       52
       52
       +
       

     

       53
       53
       +
           def get_post_url(self) -> str | None:

     

       54
       54
       +
               return self.url

+202

misskey/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       import uuid

     

       5
       5
       +
       from typing import Any, Callable

     

       6
       6
       +
       

     

       7
       7
       +
       import requests

     

       8
       8
       +
       import websockets

     

       9
       9
       +
       

     

       10
       10
       +
       import cross

     

       11
       11
       +
       import util.database as database

     

       12
       12
       +
       import util.md_util as md_util

     

       13
       13
       +
       from misskey.common import MisskeyPost

     

       14
       14
       +
       from util.media import MediaInfo, download_media

     

       15
       15
       +
       from util.util import LOGGER, as_envvar

     

       16
       16
       +
       

     

       17
       17
       +
       ALLOWED_VISIBILITY = ["public", "home"]

     

       18
       18
       +
       

     

       19
       19
       +
       

     

       20
       20
       +
       class MisskeyInputOptions:

     

       21
       21
       +
           def __init__(self, o: dict) -> None:

     

       22
       22
       +
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       23
       23
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       24
       24
       +
       

     

       25
       25
       +
               allowed_visibility = o.get("allowed_visibility")

     

       26
       26
       +
               if allowed_visibility is not None:

     

       27
       27
       +
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       28
       28
       +
                       raise ValueError(

     

       29
       29
       +
                           f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}"

     

       30
       30
       +
                       )

     

       31
       31
       +
                   self.allowed_visibility = allowed_visibility

     

       32
       32
       +
       

     

       33
       33
       +
       

     

       34
       34
       +
       class MisskeyInput(cross.Input):

     

       35
       35
       +
           def __init__(self, settings: dict, db: cross.DataBaseWorker) -> None:

     

       36
       36
       +
               self.options = MisskeyInputOptions(settings.get("options", {}))

     

       37
       37
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       38
       38
       +
                   ValueError("'token' is required")

     

       39
       39
       +
               )

     

       40
       40
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       41
       41
       +
                   ValueError("'instance' is required")

     

       42
       42
       +
               )

     

       43
       43
       +
       

     

       44
       44
       +
               service = instance[:-1] if instance.endswith("/") else instance

     

       45
       45
       +
       

     

       46
       46
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       47
       47
       +
               responce = requests.post(

     

       48
       48
       +
                   f"{instance}/api/i",

     

       49
       49
       +
                   json={"i": self.token},

     

       50
       50
       +
                   headers={"Content-Type": "application/json"},

     

       51
       51
       +
               )

     

       52
       52
       +
               if responce.status_code != 200:

     

       53
       53
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       54
       54
       +
                   responce.raise_for_status()

     

       55
       55
       +
                   return

     

       56
       56
       +
       

     

       57
       57
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       58
       58
       +
       

     

       59
       59
       +
           def _on_note(self, outputs: list[cross.Output], note: dict):

     

       60
       60
       +
               if note["userId"] != self.user_id:

     

       61
       61
       +
                   return

     

       62
       62
       +
       

     

       63
       63
       +
               if note.get("visibility") not in self.options.allowed_visibility:

     

       64
       64
       +
                   LOGGER.info(

     

       65
       65
       +
                       "Skipping '%s'! '%s' visibility..", note["id"], note.get("visibility")

     

       66
       66
       +
                   )

     

       67
       67
       +
                   return

     

       68
       68
       +
       

     

       69
       69
       +
               # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       70
       70
       +
               # we don't handle reblogs. possible with bridgy(?) and self

     

       71
       71
       +
               if note.get("poll"):

     

       72
       72
       +
                   LOGGER.info("Skipping '%s'! Contains a poll..", note["id"])

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               renote: dict | None = note.get("renote")

     

       76
       76
       +
               if renote:

     

       77
       77
       +
                   if note.get("text") is not None:

     

       78
       78
       +
                       LOGGER.info("Skipping '%s'! Quote..", note["id"])

     

       79
       79
       +
                       return

     

       80
       80
       +
       

     

       81
       81
       +
                   if renote.get("userId") != self.user_id:

     

       82
       82
       +
                       LOGGER.info("Skipping '%s'! Reblog of other user..", note["id"])

     

       83
       83
       +
                       return

     

       84
       84
       +
       

     

       85
       85
       +
                   success = database.try_insert_repost(

     

       86
       86
       +
                       self.db, note["id"], renote["id"], self.user_id, self.service

     

       87
       87
       +
                   )

     

       88
       88
       +
                   if not success:

     

       89
       89
       +
                       LOGGER.info(

     

       90
       90
       +
                           "Skipping '%s' as renoted note was not found in db!", note["id"]

     

       91
       91
       +
                       )

     

       92
       92
       +
                       return

     

       93
       93
       +
       

     

       94
       94
       +
                   for output in outputs:

     

       95
       95
       +
                       output.accept_repost(note["id"], renote["id"])

     

       96
       96
       +
                   return

     

       97
       97
       +
       

     

       98
       98
       +
               reply_id: str | None = note.get("replyId")

     

       99
       99
       +
               if reply_id:

     

       100
       100
       +
                   if note.get("reply", {}).get("userId") != self.user_id:

     

       101
       101
       +
                       LOGGER.info("Skipping '%s'! Reply to other user..", note["id"])

     

       102
       102
       +
                       return

     

       103
       103
       +
       

     

       104
       104
       +
               success = database.try_insert_post(

     

       105
       105
       +
                   self.db, note["id"], reply_id, self.user_id, self.service

     

       106
       106
       +
               )

     

       107
       107
       +
               if not success:

     

       108
       108
       +
                   LOGGER.info("Skipping '%s' as parent note was not found in db!", note["id"])

     

       109
       109
       +
                   return

     

       110
       110
       +
       

     

       111
       111
       +
               mention_handles: dict = note.get("mentionHandles") or {}

     

       112
       112
       +
               tags: list[str] = note.get("tags") or []

     

       113
       113
       +
       

     

       114
       114
       +
               handles: list[tuple[str, str]] = []

     

       115
       115
       +
               for key, value in mention_handles.items():

     

       116
       116
       +
                   handles.append((value, value))

     

       117
       117
       +
       

     

       118
       118
       +
               tokens = md_util.tokenize_markdown(note.get("text", ""), tags, handles)

     

       119
       119
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       120
       120
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", note["id"])

     

       121
       121
       +
                   return

     

       122
       122
       +
       

     

       123
       123
       +
               LOGGER.info("Crossposting '%s'...", note["id"])

     

       124
       124
       +
       

     

       125
       125
       +
               media_attachments: list[MediaInfo] = []

     

       126
       126
       +
               for attachment in note.get("files", []):

     

       127
       127
       +
                   LOGGER.info("Downloading %s...", attachment["url"])

     

       128
       128
       +
                   info = download_media(attachment["url"], attachment.get("comment") or "")

     

       129
       129
       +
                   if not info:

     

       130
       130
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", note["id"])

     

       131
       131
       +
                       return

     

       132
       132
       +
                   media_attachments.append(info)

     

       133
       133
       +
       

     

       134
       134
       +
               cross_post = MisskeyPost(self.service, note, tokens, media_attachments)

     

       135
       135
       +
               for output in outputs:

     

       136
       136
       +
                   output.accept_post(cross_post)

     

       137
       137
       +
       

     

       138
       138
       +
           def _on_delete(self, outputs: list[cross.Output], note: dict):

     

       139
       139
       +
               # TODO handle deletes

     

       140
       140
       +
               pass

     

       141
       141
       +
       

     

       142
       142
       +
           def _on_message(self, outputs: list[cross.Output], data: dict):

     

       143
       143
       +
               if data["type"] == "channel":

     

       144
       144
       +
                   type: str = data["body"]["type"]

     

       145
       145
       +
                   if type == "note" or type == "reply":

     

       146
       146
       +
                       note_body = data["body"]["body"]

     

       147
       147
       +
                       self._on_note(outputs, note_body)

     

       148
       148
       +
                       return

     

       149
       149
       +
       

     

       150
       150
       +
               pass

     

       151
       151
       +
       

     

       152
       152
       +
           async def _send_keepalive(self, ws: websockets.WebSocketClientProtocol):

     

       153
       153
       +
               while ws.open:

     

       154
       154
       +
                   try:

     

       155
       155
       +
                       await asyncio.sleep(120)

     

       156
       156
       +
                       if ws.open:

     

       157
       157
       +
                           await ws.send("h")

     

       158
       158
       +
                           LOGGER.debug("Sent keepalive h..")

     

       159
       159
       +
                       else:

     

       160
       160
       +
                           LOGGER.info("WebSocket is closed, stopping keepalive task.")

     

       161
       161
       +
                           break

     

       162
       162
       +
                   except Exception as e:

     

       163
       163
       +
                       LOGGER.error(f"Error sending keepalive: {e}")

     

       164
       164
       +
                       break

     

       165
       165
       +
       

     

       166
       166
       +
           async def _subscribe_to_home(self, ws: websockets.WebSocketClientProtocol):

     

       167
       167
       +
               await ws.send(

     

       168
       168
       +
                   json.dumps(

     

       169
       169
       +
                       {

     

       170
       170
       +
                           "type": "connect",

     

       171
       171
       +
                           "body": {"channel": "homeTimeline", "id": str(uuid.uuid4())},

     

       172
       172
       +
                       }

     

       173
       173
       +
                   )

     

       174
       174
       +
               )

     

       175
       175
       +
               LOGGER.info("Subscribed to 'homeTimeline' channel...")

     

       176
       176
       +
       

     

       177
       177
       +
           async def listen(

     

       178
       178
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       179
       179
       +
           ):

     

       180
       180
       +
               streaming: str = f"wss://{self.service.split('://', 1)[1]}"

     

       181
       181
       +
               url: str = f"{streaming}/streaming?i={self.token}"

     

       182
       182
       +
       

     

       183
       183
       +
               async for ws in websockets.connect(

     

       184
       184
       +
                   url, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       185
       185
       +
               ):

     

       186
       186
       +
                   try:

     

       187
       187
       +
                       LOGGER.info("Listening to %s...", streaming)

     

       188
       188
       +
                       await self._subscribe_to_home(ws)

     

       189
       189
       +
       

     

       190
       190
       +
                       async def listen_for_messages():

     

       191
       191
       +
                           async for msg in ws:

     

       192
       192
       +
                               # TODO listen to deletes somehow

     

       193
       193
       +
                               submit(lambda: self._on_message(outputs, json.loads(msg)))

     

       194
       194
       +
       

     

       195
       195
       +
                       keepalive = asyncio.create_task(self._send_keepalive(ws))

     

       196
       196
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       197
       197
       +
       

     

       198
       198
       +
                       await asyncio.gather(keepalive, listen)

     

       199
       199
       +
                   except websockets.ConnectionClosedError as e:

     

       200
       200
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       201
       201
       +
                       LOGGER.info("Reconnecting to %s...", streaming)

     

       202
       202
       +
                       continue

+38

misskey/mfm_util.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       

     

       5
       5
       +
       MFM_PATTERN = re.compile(r"\$\[([^\[\]]+)\]")

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       def strip_mfm(tokens: list[cross.Token]) -> tuple[list[cross.Token], bool]:

     

       9
       9
       +
           modified = False

     

       10
       10
       +
       

     

       11
       11
       +
           for tk in tokens:

     

       12
       12
       +
               if isinstance(tk, cross.TextToken):

     

       13
       13
       +
                   original = tk.text

     

       14
       14
       +
                   cleaned = __strip_mfm(original)

     

       15
       15
       +
                   if cleaned != original:

     

       16
       16
       +
                       modified = True

     

       17
       17
       +
                       tk.text = cleaned

     

       18
       18
       +
       

     

       19
       19
       +
               elif isinstance(tk, cross.LinkToken):

     

       20
       20
       +
                   original = tk.label

     

       21
       21
       +
                   cleaned = __strip_mfm(original)

     

       22
       22
       +
                   if cleaned != original:

     

       23
       23
       +
                       modified = True

     

       24
       24
       +
                       tk.label = cleaned

     

       25
       25
       +
       

     

       26
       26
       +
           return tokens, modified

     

       27
       27
       +
       

     

       28
       28
       +
       

     

       29
       29
       +
       def __strip_mfm(text: str) -> str:

     

       30
       30
       +
           def match_contents(match: re.Match[str]):

     

       31
       31
       +
               content = match.group(1).strip()

     

       32
       32
       +
               parts = content.split(" ", 1)

     

       33
       33
       +
               return parts[1] if len(parts) > 1 else ""

     

       34
       34
       +
       

     

       35
       35
       +
           while MFM_PATTERN.search(text):

     

       36
       36
       +
               text = MFM_PATTERN.sub(match_contents, text)

     

       37
       37
       +
       

     

       38
       38
       +
           return text

-183

misskey.py

···

       1
       1
       -
       import cross, media_util, util, database

     

       2
       2
       -
       from util import LOGGER

     

       3
       3
       -
       import requests, websockets

     

       4
       4
       -
       from typing import Callable, Any

     

       5
       5
       -
       import asyncio

     

       6
       6
       -
       import json, uuid

     

       7
       7
       -
           

     

       8
       8
       -
       

     

       9
       9
       -
       

     

       10
       10
       -
       class MisskeyPost(cross.Post):

     

       11
       11
       -
           def __init__(self, note: dict, files: list[media_util.MediaInfo]) -> None:

     

       12
       12
       -
               super().__init__()

     

       13
       13
       -
               self.note = note

     

       14
       14
       -
               self.sensitive = any([a.get('isSensitive', False) for a in note.get('files', [])])

     

       15
       15
       -
               self.media_attachments = files

     

       16
       16
       -
               

     

       17
       17
       -
               mention_handles: dict = note.get('mentionHandles') or {}

     

       18
       18
       -
               tags: list[str] = note.get('tags') or []

     

       19
       19
       -
           

     

       20
       20
       -
               handles: list[tuple[str, str]] = []

     

       21
       21
       -
               for key, value in mention_handles.items():

     

       22
       22
       -
                   handles.append((value, value))

     

       23
       23
       -
               

     

       24
       24
       -
               self.tokens = cross.tokenize_markdown(note.get('text', ''), tags, handles)

     

       25
       25
       -
           

     

       26
       26
       -
           def get_tokens(self) -> list[cross.Token]:

     

       27
       27
       -
               return self.tokens

     

       28
       28
       -
           

     

       29
       29
       -
           def get_parent_id(self) -> str | None:

     

       30
       30
       -
               return self.note.get('replyId')

     

       31
       31
       -
           

     

       32
       32
       -
           def get_post_date_iso(self) -> str:

     

       33
       33
       -
               date = self.note.get('createdAt')

     

       34
       34
       -
               return date or super().get_post_date_iso()

     

       35
       35
       -
           

     

       36
       36
       -
           def get_attachments(self) -> list[media_util.MediaInfo]:

     

       37
       37
       -
               return self.media_attachments

     

       38
       38
       -
           

     

       39
       39
       -
           def get_id(self) -> str:

     

       40
       40
       -
               return self.note['id']

     

       41
       41
       -
           

     

       42
       42
       -
           def get_cw(self) -> str:

     

       43
       43
       -
               return self.note.get('cw') or ''

     

       44
       44
       -
           

     

       45
       45
       -
           def get_languages(self) -> list[str]:

     

       46
       46
       -
               return []

     

       47
       47
       -
           

     

       48
       48
       -
           def is_sensitive(self) -> bool:

     

       49
       49
       -
               return self.sensitive

     

       50
       50
       -
           

     

       51
       51
       -
       ALLOWED_VISIBILITY = ['public', 'home']

     

       52
       52
       -
           

     

       53
       53
       -
       class MisskeyInputOptions():

     

       54
       54
       -
           def __init__(self, o: dict) -> None:

     

       55
       55
       -
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       56
       56
       -
               

     

       57
       57
       -
               allowed_visibility = o.get('allowed_visibility')

     

       58
       58
       -
               if allowed_visibility is not None:

     

       59
       59
       -
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       60
       60
       -
                       raise ValueError(f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}")

     

       61
       61
       -
                   self.allowed_visibility = allowed_visibility

     

       62
       62
       -
       

     

       63
       63
       -
       class MisskeyInput(cross.Input):

     

       64
       64
       -
           def __init__(self, settings: dict, db: cross.DataBaseWorker) -> None:

     

       65
       65
       -
               self.options = MisskeyInputOptions(settings.get('options', {}))

     

       66
       66
       -
               self.token = util.as_envvar(settings.get('token')) or (_ for _ in ()).throw(ValueError("'token' is required"))

     

       67
       67
       -
               instance: str = util.as_envvar(settings.get('instance')) or (_ for _ in ()).throw(ValueError("'instance' is required"))

     

       68
       68
       -
               

     

       69
       69
       -
               service = instance[:-1] if instance.endswith('/') else instance

     

       70
       70
       -
               

     

       71
       71
       -
               LOGGER.info("Verifying %s credentails...", service)

     

       72
       72
       -
               responce = requests.post(f"{instance}/api/i", json={ 'i': self.token }, headers={

     

       73
       73
       -
                   "Content-Type": "application/json"

     

       74
       74
       -
               })

     

       75
       75
       -
               if responce.status_code != 200:

     

       76
       76
       -
                   LOGGER.error("Failed to validate user credentials!")

     

       77
       77
       -
                   responce.raise_for_status()

     

       78
       78
       -
                   return

     

       79
       79
       -
               

     

       80
       80
       -
               super().__init__(service, responce.json()["id"], settings, db)

     

       81
       81
       -
           

     

       82
       82
       -
           def _on_note(self, outputs: list[cross.Output], note: dict):

     

       83
       83
       -
               if note['userId'] != self.user_id:

     

       84
       84
       -
                   return

     

       85
       85
       -
               

     

       86
       86
       -
               if note.get('renoteId') or note.get('poll'):

     

       87
       87
       -
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       88
       88
       -
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       89
       89
       -
                   LOGGER.info("Skipping '%s'! Renote or poll..", note['id'])

     

       90
       90
       -
                   return

     

       91
       91
       -
               

     

       92
       92
       -
               reply_id: str | None = note.get('replyId')

     

       93
       93
       -
               if reply_id:

     

       94
       94
       -
                   if note.get('reply', {}).get('userId') != self.user_id:

     

       95
       95
       -
                       LOGGER.info("Skipping '%s'! Reply to other user..", note['id'])

     

       96
       96
       -
                       return

     

       97
       97
       -
               

     

       98
       98
       -
               if note.get('visibility') not in self.options.allowed_visibility:

     

       99
       99
       -
                   LOGGER.info("Skipping '%s'! '%s' visibility..", note['id'], note.get('visibility'))

     

       100
       100
       -
                   return

     

       101
       101
       -
               

     

       102
       102
       -
               success = database.try_insert_post(self.db, note['id'], reply_id, self.user_id, self.service)

     

       103
       103
       -
               if not success:

     

       104
       104
       -
                   LOGGER.info("Skipping '%s' as parent note was not found in db!", note['id'])

     

       105
       105
       -
                   return

     

       106
       106
       -
               

     

       107
       107
       -
               LOGGER.info("Crossposting '%s'...", note['id'])

     

       108
       108
       -
               

     

       109
       109
       -
               media_attachments: list[media_util.MediaInfo] = []

     

       110
       110
       -
               for attachment in note.get('files', []):

     

       111
       111
       -
                   LOGGER.info("Downloading %s...", attachment['url'])

     

       112
       112
       -
                   info = media_util.download_media(attachment['url'], attachment.get('comment') or '')

     

       113
       113
       -
                   if not info:

     

       114
       114
       -
                       LOGGER.error("Skipping '%s'. Failed to download media!", note['id'])

     

       115
       115
       -
                       return

     

       116
       116
       -
                   media_attachments.append(info)

     

       117
       117
       -
               

     

       118
       118
       -
               cross_post = MisskeyPost(note, media_attachments)

     

       119
       119
       -
               for output in outputs:

     

       120
       120
       -
                   output.accept_post(cross_post)

     

       121
       121
       -
           

     

       122
       122
       -
           def _on_delete(self, outputs: list[cross.Output], note: dict):

     

       123
       123
       -
               # TODO handle deletes

     

       124
       124
       -
               pass

     

       125
       125
       -
           

     

       126
       126
       -
           def _on_message(self, outputs: list[cross.Output], data: dict):

     

       127
       127
       -
               

     

       128
       128
       -
               if data['type'] == 'channel':

     

       129
       129
       -
                   type: str = data['body']['type']

     

       130
       130
       -
                   if type == 'note' or type == 'reply':

     

       131
       131
       -
                       note_body = data['body']['body']

     

       132
       132
       -
                       self._on_note(outputs, note_body)

     

       133
       133
       -
                       return

     

       134
       134
       -
                       

     

       135
       135
       -
               pass

     

       136
       136
       -
           

     

       137
       137
       -
           async def _send_keepalive(self, ws: websockets.WebSocketClientProtocol):

     

       138
       138
       -
               while ws.open:

     

       139
       139
       -
                   try:

     

       140
       140
       -
                       await asyncio.sleep(120)

     

       141
       141
       -
                       if ws.open:

     

       142
       142
       -
                           await ws.send("h")

     

       143
       143
       -
                           LOGGER.debug("Sent keepalive h..")

     

       144
       144
       -
                       else:

     

       145
       145
       -
                           LOGGER.info("WebSocket is closed, stopping keepalive task.")

     

       146
       146
       -
                           break

     

       147
       147
       -
                   except Exception as e:

     

       148
       148
       -
                       LOGGER.error(f"Error sending keepalive: {e}")

     

       149
       149
       -
                       break

     

       150
       150
       -
           

     

       151
       151
       -
           async def _subscribe_to_home(self, ws: websockets.WebSocketClientProtocol):

     

       152
       152
       -
               await ws.send(json.dumps({

     

       153
       153
       -
                   "type": "connect",

     

       154
       154
       -
                   "body": {

     

       155
       155
       -
                       "channel": "homeTimeline",

     

       156
       156
       -
                       "id": str(uuid.uuid4())

     

       157
       157
       -
                   }

     

       158
       158
       -
               }))

     

       159
       159
       -
               LOGGER.info("Subscribed to 'homeTimeline' channel...")

     

       160
       160
       -
               

     

       161
       161
       -
           

     

       162
       162
       -
           async def listen(self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]):

     

       163
       163
       -
               streaming: str = f"wss://{self.service.split("://", 1)[1]}"

     

       164
       164
       -
               url: str = f"{streaming}/streaming?i={self.token}"

     

       165
       165
       -
               

     

       166
       166
       -
               async for ws in websockets.connect(url, extra_headers={"User-Agent": "XPost/0.0.3"}):

     

       167
       167
       -
                   try:

     

       168
       168
       -
                       LOGGER.info("Listening to %s...", streaming)

     

       169
       169
       -
                       await self._subscribe_to_home(ws)

     

       170
       170
       -
                       

     

       171
       171
       -
                       async def listen_for_messages():

     

       172
       172
       -
                           async for msg in ws:

     

       173
       173
       -
                               # TODO listen to deletes somehow

     

       174
       174
       -
                               submit(lambda: self._on_message(outputs, json.loads(msg)))

     

       175
       175
       -
                       

     

       176
       176
       -
                       keepalive = asyncio.create_task(self._send_keepalive(ws))

     

       177
       177
       -
                       listen = asyncio.create_task(listen_for_messages())

     

       178
       178
       -
                       

     

       179
       179
       -
                       await asyncio.gather(keepalive, listen)

     

       180
       180
       -
                   except websockets.ConnectionClosedError as e:

     

       181
       181
       -
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       182
       182
       -
                       LOGGER.info("Reconnecting to %s...", streaming)

     

       183
       183
       -
                       continue

-1

pyproject.toml

···

       6
       6
        
       requires-python = ">=3.12"

     

       7
       7
        
       dependencies = [

     

       8
       8
        
           "atproto>=0.0.61",

     

       9
       9
       -
           "bs4>=0.0.2",

     

       10
       9
        
           "click>=8.2.1",

     

       11
       10
        
           "python-magic>=0.4.27",

     

       12
       11
        
           "requests>=2.32.3",

+290

util/database.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import queue

     

       3
       3
       +
       import sqlite3

     

       4
       4
       +
       import threading

     

       5
       5
       +
       from concurrent.futures import Future

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       class DataBaseWorker:

     

       9
       9
       +
           def __init__(self, database: str) -> None:

     

       10
       10
       +
               super(DataBaseWorker, self).__init__()

     

       11
       11
       +
               self.database = database

     

       12
       12
       +
               self.queue = queue.Queue()

     

       13
       13
       +
               self.thread = threading.Thread(target=self._run, daemon=True)

     

       14
       14
       +
               self.shutdown_event = threading.Event()

     

       15
       15
       +
               self.conn = sqlite3.connect(self.database, check_same_thread=False)

     

       16
       16
       +
               self.lock = threading.Lock()

     

       17
       17
       +
               self.thread.start()

     

       18
       18
       +
       

     

       19
       19
       +
           def _run(self):

     

       20
       20
       +
               while not self.shutdown_event.is_set():

     

       21
       21
       +
                   try:

     

       22
       22
       +
                       task, future = self.queue.get(timeout=1)

     

       23
       23
       +
                       try:

     

       24
       24
       +
                           with self.lock:

     

       25
       25
       +
                               result = task(self.conn)

     

       26
       26
       +
                           future.set_result(result)

     

       27
       27
       +
                       except Exception as e:

     

       28
       28
       +
                           future.set_exception(e)

     

       29
       29
       +
                       finally:

     

       30
       30
       +
                           self.queue.task_done()

     

       31
       31
       +
                   except queue.Empty:

     

       32
       32
       +
                       continue

     

       33
       33
       +
       

     

       34
       34
       +
           def execute(self, sql: str, params=()):

     

       35
       35
       +
               def task(conn: sqlite3.Connection):

     

       36
       36
       +
                   cursor = conn.execute(sql, params)

     

       37
       37
       +
                   conn.commit()

     

       38
       38
       +
                   return cursor.fetchall()

     

       39
       39
       +
       

     

       40
       40
       +
               future = Future()

     

       41
       41
       +
               self.queue.put((task, future))

     

       42
       42
       +
               return future.result()

     

       43
       43
       +
       

     

       44
       44
       +
           def close(self):

     

       45
       45
       +
               self.shutdown_event.set()

     

       46
       46
       +
               self.thread.join()

     

       47
       47
       +
               with self.lock:

     

       48
       48
       +
                   self.conn.close()

     

       49
       49
       +
       

     

       50
       50
       +
       

     

       51
       51
       +
       def try_insert_repost(

     

       52
       52
       +
           db: DataBaseWorker,

     

       53
       53
       +
           post_id: str,

     

       54
       54
       +
           reposted_id: str,

     

       55
       55
       +
           input_user: str,

     

       56
       56
       +
           input_service: str,

     

       57
       57
       +
       ) -> bool:

     

       58
       58
       +
           reposted = find_post(db, reposted_id, input_user, input_service)

     

       59
       59
       +
           if not reposted:

     

       60
       60
       +
               return False

     

       61
       61
       +
       

     

       62
       62
       +
           insert_repost(db, post_id, reposted["id"], input_user, input_service)

     

       63
       63
       +
           return True

     

       64
       64
       +
       

     

       65
       65
       +
       

     

       66
       66
       +
       def try_insert_post(

     

       67
       67
       +
           db: DataBaseWorker,

     

       68
       68
       +
           post_id: str,

     

       69
       69
       +
           in_reply: str | None,

     

       70
       70
       +
           input_user: str,

     

       71
       71
       +
           input_service: str,

     

       72
       72
       +
       ) -> bool:

     

       73
       73
       +
           root_id = None

     

       74
       74
       +
           parent_id = None

     

       75
       75
       +
       

     

       76
       76
       +
           if in_reply:

     

       77
       77
       +
               parent_post = find_post(db, in_reply, input_user, input_service)

     

       78
       78
       +
               if not parent_post:

     

       79
       79
       +
                   return False

     

       80
       80
       +
       

     

       81
       81
       +
               root_id = parent_post["id"]

     

       82
       82
       +
               parent_id = root_id

     

       83
       83
       +
               if parent_post["root_id"]:

     

       84
       84
       +
                   root_id = parent_post["root_id"]

     

       85
       85
       +
       

     

       86
       86
       +
           if root_id and parent_id:

     

       87
       87
       +
               insert_reply(db, post_id, input_user, input_service, parent_id, root_id)

     

       88
       88
       +
           else:

     

       89
       89
       +
               insert_post(db, post_id, input_user, input_service)

     

       90
       90
       +
       

     

       91
       91
       +
           return True

     

       92
       92
       +
       

     

       93
       93
       +
       

     

       94
       94
       +
       def insert_repost(

     

       95
       95
       +
           db: DataBaseWorker, identifier: str, reposted_id: int, user_id: str, serivce: str

     

       96
       96
       +
       ) -> int:

     

       97
       97
       +
           db.execute(

     

       98
       98
       +
               """

     

       99
       99
       +
               INSERT INTO posts (user_id, service, identifier, reposted_id)

     

       100
       100
       +
               VALUES (?, ?, ?, ?);

     

       101
       101
       +
               """,

     

       102
       102
       +
               (user_id, serivce, identifier, reposted_id),

     

       103
       103
       +
           )

     

       104
       104
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       105
       105
       +
       

     

       106
       106
       +
       

     

       107
       107
       +
       def insert_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str) -> int:

     

       108
       108
       +
           db.execute(

     

       109
       109
       +
               """

     

       110
       110
       +
               INSERT INTO posts (user_id, service, identifier)

     

       111
       111
       +
               VALUES (?, ?, ?);

     

       112
       112
       +
               """,

     

       113
       113
       +
               (user_id, serivce, identifier),

     

       114
       114
       +
           )

     

       115
       115
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       116
       116
       +
       

     

       117
       117
       +
       

     

       118
       118
       +
       def insert_reply(

     

       119
       119
       +
           db: DataBaseWorker,

     

       120
       120
       +
           identifier: str,

     

       121
       121
       +
           user_id: str,

     

       122
       122
       +
           serivce: str,

     

       123
       123
       +
           parent: int,

     

       124
       124
       +
           root: int,

     

       125
       125
       +
       ) -> int:

     

       126
       126
       +
           db.execute(

     

       127
       127
       +
               """

     

       128
       128
       +
               INSERT INTO posts (user_id, service, identifier, parent_id, root_id)

     

       129
       129
       +
               VALUES (?, ?, ?, ?, ?);

     

       130
       130
       +
               """,

     

       131
       131
       +
               (user_id, serivce, identifier, parent, root),

     

       132
       132
       +
           )

     

       133
       133
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       134
       134
       +
       

     

       135
       135
       +
       

     

       136
       136
       +
       def insert_mapping(db: DataBaseWorker, original: int, mapped: int):

     

       137
       137
       +
           db.execute(

     

       138
       138
       +
               """

     

       139
       139
       +
           INSERT INTO mappings (original_post_id, mapped_post_id)

     

       140
       140
       +
           VALUES (?, ?);

     

       141
       141
       +
           """,

     

       142
       142
       +
               (original, mapped),

     

       143
       143
       +
           )

     

       144
       144
       +
       

     

       145
       145
       +
       

     

       146
       146
       +
       def delete_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str):

     

       147
       147
       +
           db.execute(

     

       148
       148
       +
               """

     

       149
       149
       +
               DELETE FROM posts

     

       150
       150
       +
               WHERE identifier = ?

     

       151
       151
       +
                 AND service = ?

     

       152
       152
       +
                 AND user_id = ?

     

       153
       153
       +
               """,

     

       154
       154
       +
               (identifier, serivce, user_id),

     

       155
       155
       +
           )

     

       156
       156
       +
       

     

       157
       157
       +
       

     

       158
       158
       +
       def fetch_data(db: DataBaseWorker, identifier: str, user_id: str, service: str) -> dict:

     

       159
       159
       +
           result = db.execute(

     

       160
       160
       +
               """

     

       161
       161
       +
               SELECT extra_data

     

       162
       162
       +
               FROM posts 

     

       163
       163
       +
               WHERE identifier = ? 

     

       164
       164
       +
                 AND user_id = ? 

     

       165
       165
       +
                 AND service = ?

     

       166
       166
       +
               """,

     

       167
       167
       +
               (identifier, user_id, service),

     

       168
       168
       +
           )

     

       169
       169
       +
           if not result or not result[0]:

     

       170
       170
       +
               return {}

     

       171
       171
       +
           return json.loads(result[0][0])

     

       172
       172
       +
       

     

       173
       173
       +
       

     

       174
       174
       +
       def store_data(

     

       175
       175
       +
           db: DataBaseWorker, identifier: str, user_id: str, service: str, extra_data: dict

     

       176
       176
       +
       ) -> None:

     

       177
       177
       +
           db.execute(

     

       178
       178
       +
               """

     

       179
       179
       +
               UPDATE posts

     

       180
       180
       +
               SET extra_data = ?

     

       181
       181
       +
               WHERE identifier = ?

     

       182
       182
       +
                 AND user_id = ?

     

       183
       183
       +
                 AND service = ?

     

       184
       184
       +
               """,

     

       185
       185
       +
               (json.dumps(extra_data), identifier, user_id, service),

     

       186
       186
       +
           )

     

       187
       187
       +
       

     

       188
       188
       +
       

     

       189
       189
       +
       def find_mappings(

     

       190
       190
       +
           db: DataBaseWorker, original_post: int, service: str, user_id: str

     

       191
       191
       +
       ) -> list[str]:

     

       192
       192
       +
           return db.execute(

     

       193
       193
       +
               """

     

       194
       194
       +
               SELECT p.identifier

     

       195
       195
       +
               FROM posts AS p

     

       196
       196
       +
               JOIN mappings AS m

     

       197
       197
       +
                 ON p.id = m.mapped_post_id

     

       198
       198
       +
               WHERE m.original_post_id = ?

     

       199
       199
       +
                 AND p.service = ?

     

       200
       200
       +
                 AND p.user_id = ?

     

       201
       201
       +
               ORDER BY p.id;

     

       202
       202
       +
               """,

     

       203
       203
       +
               (original_post, service, user_id),

     

       204
       204
       +
           )

     

       205
       205
       +
       

     

       206
       206
       +
       

     

       207
       207
       +
       def find_post_by_id(db: DataBaseWorker, id: int) -> dict | None:

     

       208
       208
       +
           result = db.execute(

     

       209
       209
       +
               """

     

       210
       210
       +
               SELECT user_id, service, identifier, parent_id, root_id, reposted_id

     

       211
       211
       +
               FROM posts 

     

       212
       212
       +
               WHERE id = ?

     

       213
       213
       +
               """,

     

       214
       214
       +
               (id,),

     

       215
       215
       +
           )

     

       216
       216
       +
           if not result:

     

       217
       217
       +
               return None

     

       218
       218
       +
           user_id, service, identifier, parent_id, root_id, reposted_id = result[0]

     

       219
       219
       +
           return {

     

       220
       220
       +
               "user_id": user_id,

     

       221
       221
       +
               "service": service,

     

       222
       222
       +
               "identifier": identifier,

     

       223
       223
       +
               "parent_id": parent_id,

     

       224
       224
       +
               "root_id": root_id,

     

       225
       225
       +
               "reposted_id": reposted_id,

     

       226
       226
       +
           }

     

       227
       227
       +
       

     

       228
       228
       +
       

     

       229
       229
       +
       def find_post(

     

       230
       230
       +
           db: DataBaseWorker, identifier: str, user_id: str, service: str

     

       231
       231
       +
       ) -> dict | None:

     

       232
       232
       +
           result = db.execute(

     

       233
       233
       +
               """

     

       234
       234
       +
               SELECT id, parent_id, root_id, reposted_id

     

       235
       235
       +
               FROM posts 

     

       236
       236
       +
               WHERE identifier = ? 

     

       237
       237
       +
                 AND user_id = ? 

     

       238
       238
       +
                 AND service = ?

     

       239
       239
       +
               """,

     

       240
       240
       +
               (identifier, user_id, service),

     

       241
       241
       +
           )

     

       242
       242
       +
           if not result:

     

       243
       243
       +
               return None

     

       244
       244
       +
           id, parent_id, root_id, reposted_id = result[0]

     

       245
       245
       +
           return {

     

       246
       246
       +
               "id": id,

     

       247
       247
       +
               "parent_id": parent_id,

     

       248
       248
       +
               "root_id": root_id,

     

       249
       249
       +
               "reposted_id": reposted_id,

     

       250
       250
       +
           }

     

       251
       251
       +
       

     

       252
       252
       +
       

     

       253
       253
       +
       def find_mapped_thread(

     

       254
       254
       +
           db: DataBaseWorker,

     

       255
       255
       +
           parent_id: str,

     

       256
       256
       +
           input_user: str,

     

       257
       257
       +
           input_service: str,

     

       258
       258
       +
           output_user: str,

     

       259
       259
       +
           output_service: str,

     

       260
       260
       +
       ):

     

       261
       261
       +
           reply_data: dict | None = find_post(db, parent_id, input_user, input_service)

     

       262
       262
       +
           if not reply_data:

     

       263
       263
       +
               return None

     

       264
       264
       +
       

     

       265
       265
       +
           reply_mappings: list[str] | None = find_mappings(

     

       266
       266
       +
               db, reply_data["id"], output_service, output_user

     

       267
       267
       +
           )

     

       268
       268
       +
           if not reply_mappings:

     

       269
       269
       +
               return None

     

       270
       270
       +
       

     

       271
       271
       +
           reply_identifier: str = reply_mappings[-1]

     

       272
       272
       +
           root_identifier: str = reply_mappings[0]

     

       273
       273
       +
           if reply_data["root_id"]:

     

       274
       274
       +
               root_data = find_post_by_id(db, reply_data["root_id"])

     

       275
       275
       +
               if not root_data:

     

       276
       276
       +
                   return None

     

       277
       277
       +
       

     

       278
       278
       +
               root_mappings = find_mappings(

     

       279
       279
       +
                   db, reply_data["root_id"], output_service, output_user

     

       280
       280
       +
               )

     

       281
       281
       +
               if not root_mappings:

     

       282
       282
       +
                   return None

     

       283
       283
       +
               root_identifier = root_mappings[0]

     

       284
       284
       +
       

     

       285
       285
       +
           return (

     

       286
       286
       +
               root_identifier[0],  # real ids

     

       287
       287
       +
               reply_identifier[0],

     

       288
       288
       +
               reply_data["root_id"],  # db ids

     

       289
       289
       +
               reply_data["id"],

     

       290
       290
       +
           )

+172

util/html_util.py

···

       1
       1
       +
       from html.parser import HTMLParser

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       

     

       5
       5
       +
       

     

       6
       6
       +
       class HTMLPostTokenizer(HTMLParser):

     

       7
       7
       +
           def __init__(self) -> None:

     

       8
       8
       +
               super().__init__()

     

       9
       9
       +
               self.tokens: list[cross.Token] = []

     

       10
       10
       +
       

     

       11
       11
       +
               self.mentions: list[tuple[str, str]]

     

       12
       12
       +
               self.tags: list[str]

     

       13
       13
       +
       

     

       14
       14
       +
               self.in_pre = False

     

       15
       15
       +
               self.in_code = False

     

       16
       16
       +
       

     

       17
       17
       +
               self.current_tag_stack = []

     

       18
       18
       +
               self.list_stack = []

     

       19
       19
       +
       

     

       20
       20
       +
               self.anchor_stack = []

     

       21
       21
       +
               self.anchor_data = []

     

       22
       22
       +
       

     

       23
       23
       +
           def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:

     

       24
       24
       +
               attrs_dict = dict(attrs)

     

       25
       25
       +
       

     

       26
       26
       +
               def append_newline():

     

       27
       27
       +
                   if self.tokens:

     

       28
       28
       +
                       last_token = self.tokens[-1]

     

       29
       29
       +
                       if isinstance(

     

       30
       30
       +
                           last_token, cross.TextToken

     

       31
       31
       +
                       ) and not last_token.text.endswith("\n"):

     

       32
       32
       +
                           self.tokens.append(cross.TextToken("\n"))

     

       33
       33
       +
       

     

       34
       34
       +
               match tag:

     

       35
       35
       +
                   case "br":

     

       36
       36
       +
                       self.tokens.append(cross.TextToken("  \n"))

     

       37
       37
       +
                   case "a":

     

       38
       38
       +
                       href = attrs_dict.get("href", "")

     

       39
       39
       +
                       self.anchor_stack.append(href)

     

       40
       40
       +
                   case "strong", "b":

     

       41
       41
       +
                       self.tokens.append(cross.TextToken("**"))

     

       42
       42
       +
                   case "em", "i":

     

       43
       43
       +
                       self.tokens.append(cross.TextToken("*"))

     

       44
       44
       +
                   case "del", "s":

     

       45
       45
       +
                       self.tokens.append(cross.TextToken("~~"))

     

       46
       46
       +
                   case "code":

     

       47
       47
       +
                       if not self.in_pre:

     

       48
       48
       +
                           self.tokens.append(cross.TextToken("`"))

     

       49
       49
       +
                           self.in_code = True

     

       50
       50
       +
                   case "pre":

     

       51
       51
       +
                       append_newline()

     

       52
       52
       +
                       self.tokens.append(cross.TextToken("```\n"))

     

       53
       53
       +
                       self.in_pre = True

     

       54
       54
       +
                   case "blockquote":

     

       55
       55
       +
                       append_newline()

     

       56
       56
       +
                       self.tokens.append(cross.TextToken("> "))

     

       57
       57
       +
                   case "ul", "ol":

     

       58
       58
       +
                       self.list_stack.append(tag)

     

       59
       59
       +
                       append_newline()

     

       60
       60
       +
                   case "li":

     

       61
       61
       +
                       indent = "  " * (len(self.list_stack) - 1)

     

       62
       62
       +
                       if self.list_stack and self.list_stack[-1] == "ul":

     

       63
       63
       +
                           self.tokens.append(cross.TextToken(f"{indent}- "))

     

       64
       64
       +
                       elif self.list_stack and self.list_stack[-1] == "ol":

     

       65
       65
       +
                           self.tokens.append(cross.TextToken(f"{indent}1. "))

     

       66
       66
       +
                   case _:

     

       67
       67
       +
                       if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:

     

       68
       68
       +
                           level = int(tag[1])

     

       69
       69
       +
                           self.tokens.append(cross.TextToken("\n" + "#" * level + " "))

     

       70
       70
       +
       

     

       71
       71
       +
               self.current_tag_stack.append(tag)

     

       72
       72
       +
       

     

       73
       73
       +
           def handle_data(self, data: str) -> None:

     

       74
       74
       +
               if self.anchor_stack:

     

       75
       75
       +
                   self.anchor_data.append(data)

     

       76
       76
       +
               else:

     

       77
       77
       +
                   self.tokens.append(cross.TextToken(data))

     

       78
       78
       +
       

     

       79
       79
       +
           def handle_endtag(self, tag: str) -> None:

     

       80
       80
       +
               if not self.current_tag_stack:

     

       81
       81
       +
                   return

     

       82
       82
       +
       

     

       83
       83
       +
               if tag in self.current_tag_stack:

     

       84
       84
       +
                   self.current_tag_stack.remove(tag)

     

       85
       85
       +
       

     

       86
       86
       +
               match tag:

     

       87
       87
       +
                   case "p":

     

       88
       88
       +
                       self.tokens.append(cross.TextToken("\n\n"))

     

       89
       89
       +
                   case "a":

     

       90
       90
       +
                       href = self.anchor_stack.pop()

     

       91
       91
       +
                       anchor_data = "".join(self.anchor_data)

     

       92
       92
       +
                       self.anchor_data = []

     

       93
       93
       +
       

     

       94
       94
       +
                       if anchor_data.startswith("#"):

     

       95
       95
       +
                           as_tag = anchor_data[1:].lower()

     

       96
       96
       +
                           if any(as_tag == block for block in self.tags):

     

       97
       97
       +
                               self.tokens.append(cross.TagToken(anchor_data[1:]))

     

       98
       98
       +
                       elif anchor_data.startswith("@"):

     

       99
       99
       +
                           match = next(

     

       100
       100
       +
                               (pair for pair in self.mentions if anchor_data in pair), None

     

       101
       101
       +
                           )

     

       102
       102
       +
       

     

       103
       103
       +
                           if match:

     

       104
       104
       +
                               self.tokens.append(cross.MentionToken(match[1], ""))

     

       105
       105
       +
                       else:

     

       106
       106
       +
                           self.tokens.append(cross.LinkToken(href, anchor_data))

     

       107
       107
       +
                   case "strong", "b":

     

       108
       108
       +
                       self.tokens.append(cross.TextToken("**"))

     

       109
       109
       +
                   case "em", "i":

     

       110
       110
       +
                       self.tokens.append(cross.TextToken("*"))

     

       111
       111
       +
                   case "del", "s":

     

       112
       112
       +
                       self.tokens.append(cross.TextToken("~~"))

     

       113
       113
       +
                   case "code":

     

       114
       114
       +
                       if not self.in_pre and self.in_code:

     

       115
       115
       +
                           self.tokens.append(cross.TextToken("`"))

     

       116
       116
       +
                           self.in_code = False

     

       117
       117
       +
                   case "pre":

     

       118
       118
       +
                       self.tokens.append(cross.TextToken("\n```\n"))

     

       119
       119
       +
                       self.in_pre = False

     

       120
       120
       +
                   case "blockquote":

     

       121
       121
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       122
       122
       +
                   case "ul", "ol":

     

       123
       123
       +
                       if self.list_stack:

     

       124
       124
       +
                           self.list_stack.pop()

     

       125
       125
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       126
       126
       +
                   case "li":

     

       127
       127
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       128
       128
       +
                   case _:

     

       129
       129
       +
                       if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:

     

       130
       130
       +
                           self.tokens.append(cross.TextToken("\n"))

     

       131
       131
       +
       

     

       132
       132
       +
           def get_tokens(self) -> list[cross.Token]:

     

       133
       133
       +
               if not self.tokens:

     

       134
       134
       +
                   return []

     

       135
       135
       +
       

     

       136
       136
       +
               combined: list[cross.Token] = []

     

       137
       137
       +
               buffer: list[str] = []

     

       138
       138
       +
       

     

       139
       139
       +
               def flush_buffer():

     

       140
       140
       +
                   if buffer:

     

       141
       141
       +
                       merged = "".join(buffer)

     

       142
       142
       +
                       combined.append(cross.TextToken(text=merged))

     

       143
       143
       +
                       buffer.clear()

     

       144
       144
       +
       

     

       145
       145
       +
               for token in self.tokens:

     

       146
       146
       +
                   if isinstance(token, cross.TextToken):

     

       147
       147
       +
                       buffer.append(token.text)

     

       148
       148
       +
                   else:

     

       149
       149
       +
                       flush_buffer()

     

       150
       150
       +
                       combined.append(token)

     

       151
       151
       +
       

     

       152
       152
       +
               flush_buffer()

     

       153
       153
       +
       

     

       154
       154
       +
               if combined and isinstance(combined[-1], cross.TextToken):

     

       155
       155
       +
                   if combined[-1].text.endswith("\n\n"):

     

       156
       156
       +
                       combined[-1] = cross.TextToken(combined[-1].text[:-2])

     

       157
       157
       +
               return combined

     

       158
       158
       +
       

     

       159
       159
       +
           def reset(self):

     

       160
       160
       +
               """Reset the parser state for reuse."""

     

       161
       161
       +
               super().reset()

     

       162
       162
       +
               self.tokens = []

     

       163
       163
       +
       

     

       164
       164
       +
               self.mentions = []

     

       165
       165
       +
               self.tags = []

     

       166
       166
       +
       

     

       167
       167
       +
               self.in_pre = False

     

       168
       168
       +
               self.in_code = False

     

       169
       169
       +
       

     

       170
       170
       +
               self.current_tag_stack = []

     

       171
       171
       +
               self.anchor_stack = []

     

       172
       172
       +
               self.list_stack = []

+123

util/md_util.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       import util.html_util as html_util

     

       5
       5
       +
       import util.util as util

     

       6
       6
       +
       

     

       7
       7
       +
       URL = re.compile(r"(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+", re.IGNORECASE)

     

       8
       8
       +
       MD_INLINE_LINK = re.compile(

     

       9
       9
       +
           r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)",

     

       10
       10
       +
           re.IGNORECASE,

     

       11
       11
       +
       )

     

       12
       12
       +
       MD_AUTOLINK = re.compile(

     

       13
       13
       +
           r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE

     

       14
       14
       +
       )

     

       15
       15
       +
       HASHTAG = re.compile(r"(?<!\w)\#([\w]+)")

     

       16
       16
       +
       FEDIVERSE_HANDLE = re.compile(r"(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?")

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       def tokenize_markdown(

     

       20
       20
       +
           text: str, tags: list[str], handles: list[tuple[str, str]]

     

       21
       21
       +
       ) -> list[cross.Token]:

     

       22
       22
       +
           if not text:

     

       23
       23
       +
               return []

     

       24
       24
       +
       

     

       25
       25
       +
           tokenizer = html_util.HTMLPostTokenizer()

     

       26
       26
       +
           tokenizer.mentions = handles

     

       27
       27
       +
           tokenizer.tags = tags

     

       28
       28
       +
           tokenizer.feed(text)

     

       29
       29
       +
           html_tokens = tokenizer.get_tokens()

     

       30
       30
       +
       

     

       31
       31
       +
           tokens: list[cross.Token] = []

     

       32
       32
       +
       

     

       33
       33
       +
           for tk in html_tokens:

     

       34
       34
       +
               if isinstance(tk, cross.TextToken):

     

       35
       35
       +
                   tokens.extend(__tokenize_md(tk.text, tags, handles))

     

       36
       36
       +
               elif isinstance(tk, cross.LinkToken):

     

       37
       37
       +
                   if not tk.label or util.canonical_label(tk.label, tk.href):

     

       38
       38
       +
                       tokens.append(tk)

     

       39
       39
       +
                       continue

     

       40
       40
       +
       

     

       41
       41
       +
                   tokens.extend(__tokenize_md(f"[{tk.label}]({tk.href})", tags, handles))

     

       42
       42
       +
               else:

     

       43
       43
       +
                   tokens.append(tk)

     

       44
       44
       +
       

     

       45
       45
       +
           return tokens

     

       46
       46
       +
       

     

       47
       47
       +
       

     

       48
       48
       +
       def __tokenize_md(

     

       49
       49
       +
           text: str, tags: list[str], handles: list[tuple[str, str]]

     

       50
       50
       +
       ) -> list[cross.Token]:

     

       51
       51
       +
           index: int = 0

     

       52
       52
       +
           total: int = len(text)

     

       53
       53
       +
           buffer: list[str] = []

     

       54
       54
       +
       

     

       55
       55
       +
           tokens: list[cross.Token] = []

     

       56
       56
       +
       

     

       57
       57
       +
           def flush():

     

       58
       58
       +
               nonlocal buffer

     

       59
       59
       +
               if buffer:

     

       60
       60
       +
                   tokens.append(cross.TextToken("".join(buffer)))

     

       61
       61
       +
                   buffer = []

     

       62
       62
       +
       

     

       63
       63
       +
           while index < total:

     

       64
       64
       +
               if text[index] == "[":

     

       65
       65
       +
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       66
       66
       +
                   if md_inline:

     

       67
       67
       +
                       flush()

     

       68
       68
       +
                       label = md_inline.group(1)

     

       69
       69
       +
                       href = md_inline.group(2)

     

       70
       70
       +
                       tokens.append(cross.LinkToken(href, label))

     

       71
       71
       +
                       index = md_inline.end()

     

       72
       72
       +
                       continue

     

       73
       73
       +
       

     

       74
       74
       +
               if text[index] == "<":

     

       75
       75
       +
                   md_auto = MD_AUTOLINK.match(text, index)

     

       76
       76
       +
                   if md_auto:

     

       77
       77
       +
                       flush()

     

       78
       78
       +
                       href = md_auto.group(1)

     

       79
       79
       +
                       tokens.append(cross.LinkToken(href, href))

     

       80
       80
       +
                       index = md_auto.end()

     

       81
       81
       +
                       continue

     

       82
       82
       +
       

     

       83
       83
       +
               if text[index] == "#":

     

       84
       84
       +
                   tag = HASHTAG.match(text, index)

     

       85
       85
       +
                   if tag:

     

       86
       86
       +
                       tag_text = tag.group(1)

     

       87
       87
       +
                       if tag_text.lower() in tags:

     

       88
       88
       +
                           flush()

     

       89
       89
       +
                           tokens.append(cross.TagToken(tag_text))

     

       90
       90
       +
                           index = tag.end()

     

       91
       91
       +
                           continue

     

       92
       92
       +
       

     

       93
       93
       +
               if text[index] == "@":

     

       94
       94
       +
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       95
       95
       +
                   if handle:

     

       96
       96
       +
                       handle_text = handle.group(0)

     

       97
       97
       +
                       stripped_handle = handle_text.strip()

     

       98
       98
       +
       

     

       99
       99
       +
                       match = next(

     

       100
       100
       +
                           (pair for pair in handles if stripped_handle in pair), None

     

       101
       101
       +
                       )

     

       102
       102
       +
       

     

       103
       103
       +
                       if match:

     

       104
       104
       +
                           flush()

     

       105
       105
       +
                           tokens.append(

     

       106
       106
       +
                               cross.MentionToken(match[1], "")

     

       107
       107
       +
                           )  # TODO: misskey doesn’t provide a uri

     

       108
       108
       +
                           index = handle.end()

     

       109
       109
       +
                           continue

     

       110
       110
       +
       

     

       111
       111
       +
               url = URL.match(text, index)

     

       112
       112
       +
               if url:

     

       113
       113
       +
                   flush()

     

       114
       114
       +
                   href = url.group(0)

     

       115
       115
       +
                   tokens.append(cross.LinkToken(href, href))

     

       116
       116
       +
                   index = url.end()

     

       117
       117
       +
                   continue

     

       118
       118
       +
       

     

       119
       119
       +
               buffer.append(text[index])

     

       120
       120
       +
               index += 1

     

       121
       121
       +
       

     

       122
       122
       +
           flush()

     

       123
       123
       +
           return tokens

+160

util/media.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import os

     

       3
       3
       +
       import re

     

       4
       4
       +
       import subprocess

     

       5
       5
       +
       import urllib.parse

     

       6
       6
       +
       

     

       7
       7
       +
       import magic

     

       8
       8
       +
       import requests

     

       9
       9
       +
       

     

       10
       10
       +
       from util.util import LOGGER

     

       11
       11
       +
       

     

       12
       12
       +
       FILENAME = re.compile(r'filename="?([^\";]*)"?')

     

       13
       13
       +
       MAGIC = magic.Magic(mime=True)

     

       14
       14
       +
       

     

       15
       15
       +
       

     

       16
       16
       +
       class MediaInfo:

     

       17
       17
       +
           def __init__(self, url: str, name: str, mime: str, alt: str, io: bytes) -> None:

     

       18
       18
       +
               self.url = url

     

       19
       19
       +
               self.name = name

     

       20
       20
       +
               self.mime = mime

     

       21
       21
       +
               self.alt = alt

     

       22
       22
       +
               self.io = io

     

       23
       23
       +
       

     

       24
       24
       +
       

     

       25
       25
       +
       def download_media(url: str, alt: str) -> MediaInfo | None:

     

       26
       26
       +
           name = get_filename_from_url(url)

     

       27
       27
       +
           io = download_blob(url, max_bytes=100_000_000)

     

       28
       28
       +
           if not io:

     

       29
       29
       +
               LOGGER.error("Failed to download media attachment! %s", url)

     

       30
       30
       +
               return None

     

       31
       31
       +
           mime = MAGIC.from_buffer(io)

     

       32
       32
       +
           if not mime:

     

       33
       33
       +
               mime = "application/octet-stream"

     

       34
       34
       +
           return MediaInfo(url, name, mime, alt, io)

     

       35
       35
       +
       

     

       36
       36
       +
       

     

       37
       37
       +
       def get_filename_from_url(url):

     

       38
       38
       +
           try:

     

       39
       39
       +
               response = requests.head(url, allow_redirects=True)

     

       40
       40
       +
               disposition = response.headers.get("Content-Disposition")

     

       41
       41
       +
               if disposition:

     

       42
       42
       +
                   filename = FILENAME.findall(disposition)

     

       43
       43
       +
                   if filename:

     

       44
       44
       +
                       return filename[0]

     

       45
       45
       +
           except requests.RequestException:

     

       46
       46
       +
               pass

     

       47
       47
       +
       

     

       48
       48
       +
           parsed_url = urllib.parse.urlparse(url)

     

       49
       49
       +
           base_name = os.path.basename(parsed_url.path)

     

       50
       50
       +
       

     

       51
       51
       +
           # hardcoded fix to return the cid for pds

     

       52
       52
       +
           if base_name == "com.atproto.sync.getBlob":

     

       53
       53
       +
               qs = urllib.parse.parse_qs(parsed_url.query)

     

       54
       54
       +
               if qs and qs.get("cid"):

     

       55
       55
       +
                   return qs["cid"][0]

     

       56
       56
       +
       

     

       57
       57
       +
           return base_name

     

       58
       58
       +
       

     

       59
       59
       +
       

     

       60
       60
       +
       def probe_bytes(bytes: bytes) -> dict:

     

       61
       61
       +
           cmd = [

     

       62
       62
       +
               "ffprobe",

     

       63
       63
       +
               "-v", "error",

     

       64
       64
       +
               "-show_format",

     

       65
       65
       +
               "-show_streams",

     

       66
       66
       +
               "-print_format", "json",

     

       67
       67
       +
               "pipe:0",

     

       68
       68
       +
           ]

     

       69
       69
       +
           proc = subprocess.run(

     

       70
       70
       +
               cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       71
       71
       +
           )

     

       72
       72
       +
       

     

       73
       73
       +
           if proc.returncode != 0:

     

       74
       74
       +
               raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")

     

       75
       75
       +
       

     

       76
       76
       +
           return json.loads(proc.stdout)

     

       77
       77
       +
       

     

       78
       78
       +
       

     

       79
       79
       +
       def convert_to_mp4(video_bytes: bytes) -> bytes:

     

       80
       80
       +
           cmd = [

     

       81
       81
       +
               "ffmpeg",

     

       82
       82
       +
               "-i", "pipe:0",

     

       83
       83
       +
               "-c:v", "libx264",

     

       84
       84
       +
               "-crf", "30",

     

       85
       85
       +
               "-preset", "slow",

     

       86
       86
       +
               "-c:a", "aac",

     

       87
       87
       +
               "-b:a", "128k",

     

       88
       88
       +
               "-movflags", "frag_keyframe+empty_moov+default_base_moof",

     

       89
       89
       +
               "-f", "mp4",

     

       90
       90
       +
               "pipe:1",

     

       91
       91
       +
           ]

     

       92
       92
       +
       

     

       93
       93
       +
           proc = subprocess.Popen(

     

       94
       94
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       95
       95
       +
           )

     

       96
       96
       +
           out_bytes, err = proc.communicate(input=video_bytes)

     

       97
       97
       +
       

     

       98
       98
       +
           if proc.returncode != 0:

     

       99
       99
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       100
       100
       +
       

     

       101
       101
       +
           return out_bytes

     

       102
       102
       +
       

     

       103
       103
       +
       

     

       104
       104
       +
       def compress_image(image_bytes: bytes, quality: int = 90):

     

       105
       105
       +
           cmd = [

     

       106
       106
       +
               "ffmpeg",

     

       107
       107
       +
               "-f", "image2pipe",

     

       108
       108
       +
               "-i", "pipe:0",

     

       109
       109
       +
               "-c:v", "webp",

     

       110
       110
       +
               "-q:v", str(quality),

     

       111
       111
       +
               "-f", "image2pipe",

     

       112
       112
       +
               "pipe:1",

     

       113
       113
       +
           ]

     

       114
       114
       +
       

     

       115
       115
       +
           proc = subprocess.Popen(

     

       116
       116
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       117
       117
       +
           )

     

       118
       118
       +
           out_bytes, err = proc.communicate(input=image_bytes)

     

       119
       119
       +
       

     

       120
       120
       +
           if proc.returncode != 0:

     

       121
       121
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       122
       122
       +
       

     

       123
       123
       +
           return out_bytes

     

       124
       124
       +
       

     

       125
       125
       +
       

     

       126
       126
       +
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       127
       127
       +
           response = requests.get(url, stream=True, timeout=20)

     

       128
       128
       +
           if response.status_code != 200:

     

       129
       129
       +
               LOGGER.info("Failed to download %s! %s", url, response.text)

     

       130
       130
       +
               return None

     

       131
       131
       +
       

     

       132
       132
       +
           downloaded_bytes = b""

     

       133
       133
       +
           current_size = 0

     

       134
       134
       +
       

     

       135
       135
       +
           for chunk in response.iter_content(chunk_size=8192):

     

       136
       136
       +
               if not chunk:

     

       137
       137
       +
                   continue

     

       138
       138
       +
       

     

       139
       139
       +
               current_size += len(chunk)

     

       140
       140
       +
               if current_size > max_bytes:

     

       141
       141
       +
                   response.close()

     

       142
       142
       +
                   return None

     

       143
       143
       +
       

     

       144
       144
       +
               downloaded_bytes += chunk

     

       145
       145
       +
       

     

       146
       146
       +
           return downloaded_bytes

     

       147
       147
       +
       

     

       148
       148
       +
       

     

       149
       149
       +
       def get_media_meta(bytes: bytes):

     

       150
       150
       +
           probe = probe_bytes(bytes)

     

       151
       151
       +
           streams = [s for s in probe["streams"] if s["codec_type"] == "video"]

     

       152
       152
       +
           if not streams:

     

       153
       153
       +
               raise ValueError("No video stream found")

     

       154
       154
       +
       

     

       155
       155
       +
           media = streams[0]

     

       156
       156
       +
           return {

     

       157
       157
       +
               "width": int(media["width"]),

     

       158
       158
       +
               "height": int(media["height"]),

     

       159
       159
       +
               "duration": float(media.get("duration", probe["format"].get("duration", -1))),

     

       160
       160
       +
           }

+43

util/util.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import logging

     

       3
       3
       +
       import os

     

       4
       4
       +
       import sys

     

       5
       5
       +
       

     

       6
       6
       +
       logging.basicConfig(stream=sys.stdout, level=logging.INFO)

     

       7
       7
       +
       LOGGER = logging.getLogger("XPost")

     

       8
       8
       +
       

     

       9
       9
       +
       

     

       10
       10
       +
       def as_json(obj, indent=None, sort_keys=False) -> str:

     

       11
       11
       +
           return json.dumps(

     

       12
       12
       +
               obj.__dict__ if not isinstance(obj, dict) else obj,

     

       13
       13
       +
               default=lambda o: o.__json__() if hasattr(o, "__json__") else o.__dict__,

     

       14
       14
       +
               indent=indent,

     

       15
       15
       +
               sort_keys=sort_keys,

     

       16
       16
       +
           )

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       def canonical_label(label: str | None, href: str):

     

       20
       20
       +
           if not label or label == href:

     

       21
       21
       +
               return True

     

       22
       22
       +
       

     

       23
       23
       +
           split = href.split("://", 1)

     

       24
       24
       +
           if len(split) > 1:

     

       25
       25
       +
               if split[1] == label:

     

       26
       26
       +
                   return True

     

       27
       27
       +
       

     

       28
       28
       +
           return False

     

       29
       29
       +
       

     

       30
       30
       +
       

     

       31
       31
       +
       def safe_get(obj: dict, key: str, default):

     

       32
       32
       +
           val = obj.get(key, default)

     

       33
       33
       +
           return val if val else default

     

       34
       34
       +
       

     

       35
       35
       +
       

     

       36
       36
       +
       def as_envvar(text: str | None) -> str | None:

     

       37
       37
       +
           if not text:

     

       38
       38
       +
               return None

     

       39
       39
       +
       

     

       40
       40
       +
           if text.startswith("env:"):

     

       41
       41
       +
               return os.environ.get(text[4:], "")

     

       42
       42
       +
       

     

       43
       43
       +
           return text

-36

util.py

···

       1
       1
       -
       import logging, sys, os

     

       2
       2
       -
       import json

     

       3
       3
       -
       

     

       4
       4
       -
       logging.basicConfig(stream=sys.stdout, level=logging.INFO)

     

       5
       5
       -
       LOGGER = logging.getLogger("XPost")

     

       6
       6
       -
       

     

       7
       7
       -
       def as_json(obj, indent=None,sort_keys=False) -> str:

     

       8
       8
       -
           return json.dumps(

     

       9
       9
       -
               obj.__dict__ if not isinstance(obj, dict) else obj, 

     

       10
       10
       -
               default=lambda o: o.__json__() if hasattr(o, '__json__') else o.__dict__,

     

       11
       11
       -
               indent=indent,

     

       12
       12
       -
               sort_keys=sort_keys)

     

       13
       13
       -
       

     

       14
       14
       -
       def canonical_label(label: str | None, href: str):

     

       15
       15
       -
           if not label or label == href:

     

       16
       16
       -
               return True

     

       17
       17
       -
           

     

       18
       18
       -
           split = href.split('://', 1)

     

       19
       19
       -
           if len(split) > 1:

     

       20
       20
       -
               if split[1] == label:

     

       21
       21
       -
                   return True

     

       22
       22
       -
           

     

       23
       23
       -
           return False

     

       24
       24
       -
       

     

       25
       25
       -
       def safe_get(obj: dict, key: str, default):

     

       26
       26
       -
           val = obj.get(key, default)

     

       27
       27
       -
           return val if val else default

     

       28
       28
       -
       

     

       29
       29
       -
       def as_envvar(text: str | None) -> str | None:

     

       30
       30
       -
           if not text:

     

       31
       31
       -
               return None

     

       32
       32
       -
           

     

       33
       33
       -
           if text.startswith('env:'):

     

       34
       34
       -
               return os.environ.get(text[4:], '')

     

       35
       35
       -
           

     

       36
       36
       -
           return text

-36

uv.lock

···

       45
       45
        
       ]

     

       46
       46
        
       

     

       47
       47
        
       [[package]]

     

       48
       48
       -
       name = "beautifulsoup4"

     

       49
       49
       -
       version = "4.13.4"

     

       50
       50
       -
       source = { registry = "https://pypi.org/simple" }

     

       51
       51
       -
       dependencies = [

     

       52
       52
       -
           { name = "soupsieve" },

     

       53
       53
       -
           { name = "typing-extensions" },

     

       54
       54
       -
       ]

     

       55
       55
       -
       sdist = { url = "https://files.pythonhosted.org/packages/d8/e4/0c4c39e18fd76d6a628d4dd8da40543d136ce2d1752bd6eeeab0791f4d6b/beautifulsoup4-4.13.4.tar.gz", hash = "sha256:dbb3c4e1ceae6aefebdaf2423247260cd062430a410e38c66f2baa50a8437195", size = 621067, upload-time = "2025-04-15T17:05:13.836Z" }

     

       56
       56
       -
       wheels = [

     

       57
       57
       -
           { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" },

     

       58
       58
       -
       ]

     

       59
       59
       -
       

     

       60
       60
       -
       [[package]]

     

       61
       61
       -
       name = "bs4"

     

       62
       62
       -
       version = "0.0.2"

     

       63
       63
       -
       source = { registry = "https://pypi.org/simple" }

     

       64
       64
       -
       dependencies = [

     

       65
       65
       -
           { name = "beautifulsoup4" },

     

       66
       66
       -
       ]

     

       67
       67
       -
       sdist = { url = "https://files.pythonhosted.org/packages/c9/aa/4acaf814ff901145da37332e05bb510452ebed97bc9602695059dd46ef39/bs4-0.0.2.tar.gz", hash = "sha256:a48685c58f50fe127722417bae83fe6badf500d54b55f7e39ffe43b798653925", size = 698, upload-time = "2024-01-17T18:15:47.371Z" }

     

       68
       68
       -
       wheels = [

     

       69
       69
       -
           { url = "https://files.pythonhosted.org/packages/51/bb/bf7aab772a159614954d84aa832c129624ba6c32faa559dfb200a534e50b/bs4-0.0.2-py2.py3-none-any.whl", hash = "sha256:abf8742c0805ef7f662dce4b51cca104cffe52b835238afc169142ab9b3fbccc", size = 1189, upload-time = "2024-01-17T18:15:48.613Z" },

     

       70
       70
       -
       ]

     

       71
       71
       -
       

     

       72
       72
       -
       [[package]]

     

       73
       48
        
       name = "certifi"

     

       74
       49
        
       version = "2025.4.26"

     

       75
       50
        
       source = { registry = "https://pypi.org/simple" }

     
···

       391
       366
        
       ]

     

       392
       367
        
       

     

       393
       368
        
       [[package]]

     

       394
       394
       -
       name = "soupsieve"

     

       395
       395
       -
       version = "2.7"

     

       396
       396
       -
       source = { registry = "https://pypi.org/simple" }

     

       397
       397
       -
       sdist = { url = "https://files.pythonhosted.org/packages/3f/f4/4a80cd6ef364b2e8b65b15816a843c0980f7a5a2b4dc701fc574952aa19f/soupsieve-2.7.tar.gz", hash = "sha256:ad282f9b6926286d2ead4750552c8a6142bc4c783fd66b0293547c8fe6ae126a", size = 103418, upload-time = "2025-04-20T18:50:08.518Z" }

     

       398
       398
       -
       wheels = [

     

       399
       399
       -
           { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" },

     

       400
       400
       -
       ]

     

       401
       401
       -
       

     

       402
       402
       -
       [[package]]

     

       403
       369
        
       name = "typing-extensions"

     

       404
       370
        
       version = "4.14.0"

     

       405
       371
        
       source = { registry = "https://pypi.org/simple" }

     
···

       466
       432
        
       source = { virtual = "." }

     

       467
       433
        
       dependencies = [

     

       468
       434
        
           { name = "atproto" },

     

       469
       469
       -
           { name = "bs4" },

     

       470
       435
        
           { name = "click" },

     

       471
       436
        
           { name = "python-magic" },

     

       472
       437
        
           { name = "requests" },

     
···

       476
       441
        
       [package.metadata]

     

       477
       442
        
       requires-dist = [

     

       478
       443
        
           { name = "atproto", specifier = ">=0.0.61" },

     

       479
       479
       -
           { name = "bs4", specifier = ">=0.0.2" },

     

       480
       444
        
           { name = "click", specifier = ">=8.2.1" },

     

       481
       445
        
           { name = "python-magic", specifier = ">=0.4.27" },

     

       482
       446
        
           { name = "requests", specifier = ">=2.32.3" },

Compare changes