comparing 996650db66581c3d28ec1d2fe8d04e0a09c136ce and master on zenfyr.dev/xpost

.dockerignore

···

       1
       1
       +
       .env

     

       2
       2
       +
       .env.*

     

       3
       3
       +
       .gitignore

     

       4
       4
       +
       .DS_Store

     

       5
       5
       +
       *.swp

     

       6
       6
       +
       *~

     

       7
       7
       +
       __pycache__/

     

       8
       8
       +
       .venv

.gitignore

···

       9
       9
        
       # Virtual environments

     

       10
       10
        
       .venv

     

       11
       11
        
       

     

       12
       12
       +
       .idea/

     

       13
       13
       +
       .vscode/

     

       12
       14
        
       data/

+50

.tangled/workflows/build-images.yml

···

       1
       1
       +
       when:

     

       2
       2
       +
         - event: ["push", "manual"]

     

       3
       3
       +
           branch: master

     

       4
       4
       +
       

     

       5
       5
       +
       engine: nixery

     

       6
       6
       +
       

     

       7
       7
       +
       dependencies:

     

       8
       8
       +
         nixpkgs:

     

       9
       9
       +
           - kaniko

     

       10
       10
       +
           - regctl

     

       11
       11
       +
       

     

       12
       12
       +
       environment:

     

       13
       13
       +
         GHCR_USER: "zenfyrdev"

     

       14
       14
       +
       

     

       15
       15
       +
       steps:

     

       16
       16
       +
         - name: create auth configs

     

       17
       17
       +
           command: |

     

       18
       18
       +
             mkdir -p $HOME/.docker $HOME/.regctl

     

       19
       19
       +
       

     

       20
       20
       +
             cat > $HOME/.docker/config.json <<EOF

     

       21
       21
       +
             {"auths": {"ghcr.io": {"auth": "$(echo -n "$GHCR_USER:$GHCR_PAT" | base64 -w0)"}}}

     

       22
       22
       +
             EOF

     

       23
       23
       +
       

     

       24
       24
       +
             cat > $HOME/.regctl/config.json <<EOF

     

       25
       25
       +
             {"hosts": {"ghcr.io": {"user": "$GHCR_USER","pass": "$GHCR_PAT"}}}

     

       26
       26
       +
             EOF

     

       27
       27
       +
       

     

       28
       28
       +
         - name: build amd64

     

       29
       29
       +
           command: |

     

       30
       30
       +
             executor \

     

       31
       31
       +
               --context=dir://. \

     

       32
       32
       +
               --dockerfile=Containerfile \

     

       33
       33
       +
               --verbosity=info \

     

       34
       34
       +
               --destination=ghcr.io/$GHCR_USER/xpost:amd64-latest \

     

       35
       35
       +
               --custom-platform=linux/amd64

     

       36
       36
       +
       

     

       37
       37
       +
         - name: build arm64

     

       38
       38
       +
           command: |

     

       39
       39
       +
             executor \

     

       40
       40
       +
               --context=dir://. \

     

       41
       41
       +
               --dockerfile=Containerfile \

     

       42
       42
       +
               --verbosity=info \

     

       43
       43
       +
               --destination=ghcr.io/$GHCR_USER/xpost:arm64-latest \

     

       44
       44
       +
               --custom-platform=linux/arm64

     

       45
       45
       +
       

     

       46
       46
       +
         - name: tag latest artifact

     

       47
       47
       +
           command: |

     

       48
       48
       +
             regctl index create ghcr.io/$GHCR_USER/xpost:latest \

     

       49
       49
       +
               --ref ghcr.io/$GHCR_USER/xpost:amd64-latest  --platform linux/amd64 \

     

       50
       50
       +
               --ref ghcr.io/$GHCR_USER/xpost:arm64-latest  --platform linux/arm64

+41

Containerfile

···

       1
       1
       +
       FROM python:3.12-alpine

     

       2
       2
       +
       COPY --from=ghcr.io/astral-sh/uv:0.7.12 /uv /uvx /bin/

     

       3
       3
       +
       

     

       4
       4
       +
       # Install build tools & runtime dependencies

     

       5
       5
       +
       RUN apk add --no-cache \

     

       6
       6
       +
           ffmpeg \

     

       7
       7
       +
           file \

     

       8
       8
       +
           libmagic

     

       9
       9
       +
       

     

       10
       10
       +
       RUN mkdir -p /app/data

     

       11
       11
       +
       WORKDIR /app

     

       12
       12
       +
       

     

       13
       13
       +
       # switch to a non-root user

     

       14
       14
       +
       RUN adduser -D -u 1000 app && \

     

       15
       15
       +
           chown -R app:app /app

     

       16
       16
       +
       USER app

     

       17
       17
       +
       

     

       18
       18
       +
       # Enable bytecode compilation

     

       19
       19
       +
       ENV UV_COMPILE_BYTECODE=1

     

       20
       20
       +
       

     

       21
       21
       +
       # Copy from the cache instead of linking since it's a mounted volume

     

       22
       22
       +
       ENV UV_LINK_MODE=copy

     

       23
       23
       +
       

     

       24
       24
       +
       # Install the project's dependencies using the lockfile and settings

     

       25
       25
       +
       COPY ./uv.lock ./pyproject.toml /app/

     

       26
       26
       +
       RUN --mount=type=cache,target=/root/.cache/uv \

     

       27
       27
       +
           uv sync --locked --no-install-project --no-dev

     

       28
       28
       +
       

     

       29
       29
       +
       # Define app data volume

     

       30
       30
       +
       VOLUME /app/data

     

       31
       31
       +
       

     

       32
       32
       +
       # Then, add the rest of the project source code and install it

     

       33
       33
       +
       COPY . /app

     

       34
       34
       +
       RUN --mount=type=cache,target=/root/.cache/uv \

     

       35
       35
       +
           uv sync --locked --no-dev

     

       36
       36
       +
       

     

       37
       37
       +
       # Place executables in the environment at the front of the path

     

       38
       38
       +
       ENV PATH="/app/.venv/bin:$PATH"

     

       39
       39
       +
       

     

       40
       40
       +
       # Set entrypoint to run the app using uv

     

       41
       41
       +
       ENTRYPOINT ["uv", "run", "main.py"]

+21

LICENSE

···

       1
       1
       +
       MIT License

     

       2
       2
       +
       

     

       3
       3
       +
       Copyright (c) 2025

     

       4
       4
       +
       

     

       5
       5
       +
       Permission is hereby granted, free of charge, to any person obtaining a copy

     

       6
       6
       +
       of this software and associated documentation files (the "Software"), to deal

     

       7
       7
       +
       in the Software without restriction, including without limitation the rights

     

       8
       8
       +
       to use, copy, modify, merge, publish, distribute, sublicense, and/or sell

     

       9
       9
       +
       copies of the Software, and to permit persons to whom the Software is

     

       10
       10
       +
       furnished to do so, subject to the following conditions:

     

       11
       11
       +
       

     

       12
       12
       +
       The above copyright notice and this permission notice shall be included in all

     

       13
       13
       +
       copies or substantial portions of the Software.

     

       14
       14
       +
       

     

       15
       15
       +
       THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

     

       16
       16
       +
       IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

     

       17
       17
       +
       FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE

     

       18
       18
       +
       AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER

     

       19
       19
       +
       LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,

     

       20
       20
       +
       OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

     

       21
       21
       +
       SOFTWARE.

+137 -20

README.md

···

       1
       1
        
       # XPost

     

       2
       2
        
       

     

       3
       3
       -
       > put more readme here uhhh

     

       3
       3
       +
       XPost is a social media cross-posting tool that differs from others by using streaming APIs to allow instant, zero-input cross-posting. this means you can continue posting on your preferred platform without using special apps.

     

       4
       4
        
       

     

       5
       5
       -
       a silly little crossposting tool based on the mastodon streaming api.

     

       5
       5
       +
       XPost tries to support as many features as possible. for example, when cross-posting from mastodon to bluesky, unsupported file types will be attached as links. posts with mixed media or too many files will be split and spread across text.

     

       6
       6
        
       

     

       7
       7
       -
       this tool is very, very not production ready or something. use with caution.

     

       7
       7
       +
       the tool may undergo breaking changes as new features are added, so proceed with caution when deploying.

     

       8
       8
        
       

     

       9
       9
        
       # Installation

     

       10
       10
        
       

     

       11
       11
       -
       first install `ffmpeg` with `ffprobe`, make sure they are available on PATH! `ffmpeg` is required to crosspost media.

     

       11
       11
       +
       ## Native

     

       12
       12
       +
       

     

       13
       13
       +
       first install `ffmpeg`, `ffprobe` and `libmagic`, make sure that `ffmpeg` is available on PATH! `ffmpeg` and `libmagic` are required to crosspost media.

     

       12
       14
        
       

     

       13
       15
        
       then get [uv](https://github.com/astral-sh/uv) and sync the project

     

       14
       16
        
       

     
···

       16
       18
        
       uv sync

     

       17
       19
        
       ```

     

       18
       20
        
       

     

       19
       19
       -
       print help message:

     

       21
       21
       +
       generate settings.json on first launch

     

       20
       22
        
       

     

       21
       23
        
       ```

     

       22
       22
       -
       uv run main.py run --help

     

       24
       24
       +
       uv run main.py

     

       25
       25
       +
       ```

     

       26
       26
       +
       

     

       27
       27
       +
       ## Docker Compose

     

       28
       28
       +
       

     

       29
       29
       +
       the official immage is available on [docker hub](https://hub.docker.com/r/melontini/xpost). example `compose.yaml`. this assumes that data dir is `./data`, and env file is `./.config/docker.env`. add `:Z` to volume mounts for podman.

     

       30
       30
       +
       

     

       31
       31
       +
       ```yaml

     

       32
       32
       +
       services:

     

       33
       33
       +
         xpost:

     

       34
       34
       +
           image: melontini/xpost:latest

     

       35
       35
       +
           restart: unless-stopped

     

       36
       36
       +
           env_file: ./.config/docker.env

     

       37
       37
       +
           volumes:

     

       38
       38
       +
             - ./data:/app/data

     

       23
       39
        
       ```

     

       24
       40
        
       

     

       25
       41
        
       # Settings

     

       26
       42
        
       

     

       27
       27
       -
       ## Bluesky

     

       43
       43
       +
       the tool allows you to specify an input and multiple outputs to post to.

     

       44
       44
       +
       

     

       45
       45
       +
       some options accept a envvar syntax:

     

       46
       46
       +
       

     

       47
       47
       +
       ```json

     

       48
       48
       +
       {

     

       49
       49
       +
           "token": "env:TOKEN"

     

       50
       50
       +
       }

     

       51
       51
       +
       ```

     

       52
       52
       +
       

     

       53
       53
       +
       ## Inputs

     

       28
       54
        
       

     

       29
       29
       -
       in the bluesky block, you can configure who is allowed to reply to and quote the new posts.

     

       55
       55
       +
       all inputs have common options.

     

       30
       56
        
       

     

       31
       31
       -
       `quote_gate`:

     

       57
       57
       +
       ```json5

     

       58
       58
       +
       {

     

       59
       59
       +
           "options": {

     

       60
       60
       +
               "regex_filters": [ //posts matching any of the following regexes will be skipped

     

       61
       61
       +
                   "(?i)\\b(?:test|hello|hi)\\b"

     

       62
       62
       +
               ]

     

       63
       63
       +
           }

     

       64
       64
       +
       }

     

       65
       65
       +
       ```

     

       32
       66
        
       

     

       33
       33
       -
       prevent users from quoting the post. default: `false`

     

       67
       67
       +
       ### Bluesky Jetstream

     

       34
       68
        
       

     

       35
       35
       -
       `thread_gate`: 

     

       69
       69
       +
       listens to repo operation events emmited by Jetstream. handle becomes optional if you specify a DID.

     

       36
       70
        
       

     

       37
       37
       -
       prevent users from replying to the post. leave empty to prevent replies completely.

     

       71
       71
       +
       ```json5

     

       72
       72
       +
       {

     

       73
       73
       +
           "type": "bluesky-jetstream-wss",

     

       74
       74
       +
           "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)

     

       75
       75
       +
           "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)

     

       76
       76
       +
           "jetstream": "wss://jetstream2.us-east.bsky.network/subscribe" //optional, change jetstream endpoint

     

       77
       77
       +
       }

     

       78
       78
       +
       ```

     

       38
       79
        
       

     

       39
       39
       -
       accepted values:

     

       40
       40
       -
       - `following` followed users.

     

       41
       41
       -
       - `followers` users following the account.

     

       42
       42
       -
       - `mentioned` users mentioned in the post.

     

       43
       43
       -
       - `everybody` everybody is allowed to reply to the post. all other options will be skipped.

     

       80
       80
       +
       ### Mastodon WebSocket `mastodon-wss`

     

       44
       81
        
       

     

       82
       82
       +
       listens to the user's home timeline for new posts, crossposts only the public/unlisted ones by the user.

     

       45
       83
        
       

     

       46
       46
       -
       # Supported Software

     

       84
       84
       +
       ```json5

     

       85
       85
       +
       {

     

       86
       86
       +
           "type": "mastodon-wss", // type

     

       87
       87
       +
           "instance": "env:MASTODON_INSTANCE", // mastodon api compatible instance

     

       88
       88
       +
           "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read:statuses%20write:statuses%20profile but doesn't work with all software

     

       89
       89
       +
           "options": {

     

       90
       90
       +
               "allowed_visibility": [

     

       91
       91
       +
                   "public",

     

       92
       92
       +
                   "unlisted"

     

       93
       93
       +
               ]

     

       94
       94
       +
           }

     

       95
       95
       +
       }

     

       96
       96
       +
       ```

     

       47
       97
        
       

     

       48
       98
        
       any instance implementing `/api/v1/instance`, `/api/v1/accounts/verify_credentials` and `/api/v1/streaming?stream` will work fine.

     

       49
       99
        
       

     

       50
       100
        
       confirmed supported:

     

       51
       101
        
       - Mastodon

     

       52
       102
        
       - Iceshrimp.NET

     

       53
       53
       -
       - Sharkey

     

       54
       103
        
       - Akkoma

     

       55
       104
        
       

     

       56
       105
        
       confirmed unsupported:

     

       57
       57
       -
       - Mitra
     

       106
       106
       +
       - Mitra

     

       107
       107
       +
       - Sharkey

     

       108
       108
       +
       

     

       109
       109
       +
       ### Misskey WebSocket

     

       110
       110
       +
       

     

       111
       111
       +
       listens to the homeTimeline channel for new posts, crossposts only the public/home ones by the user.

     

       112
       112
       +
       

     

       113
       113
       +
       **IMPORTANT**: Misskey WSS does Not support deletes, you must delete posts manually. if you know how i can listen to all note events, i would appreciate your help.

     

       114
       114
       +
       

     

       115
       115
       +
       ```json5

     

       116
       116
       +
       {

     

       117
       117
       +
           "type": "misskey-wss", // type

     

       118
       118
       +
           "instance": "env:MISSKEY_INSTANCE",  // misskey instance

     

       119
       119
       +
           "token": "env:MISSKEY_TOKEN", // access token with the `View your account information` scope

     

       120
       120
       +
           "options": {

     

       121
       121
       +
               "allowed_visibility": [

     

       122
       122
       +
                   "public",

     

       123
       123
       +
                   "home"

     

       124
       124
       +
               ]

     

       125
       125
       +
           }

     

       126
       126
       +
       }

     

       127
       127
       +
       ```

     

       128
       128
       +
       

     

       129
       129
       +
       Misskey API is not very good, this also wasn't tested on vanilla misskey.

     

       130
       130
       +
       

     

       131
       131
       +
       confirmed supported:

     

       132
       132
       +
       - Sharkey

     

       133
       133
       +
       

     

       134
       134
       +
       ## Outputs

     

       135
       135
       +
       

     

       136
       136
       +
       ### Mastodon API

     

       137
       137
       +
       

     

       138
       138
       +
       no remarks.

     

       139
       139
       +
       

     

       140
       140
       +
       ```json5

     

       141
       141
       +
       {

     

       142
       142
       +
           "type": "mastodon",

     

       143
       143
       +
           "token": "env:MASTODON_TOKEN", // Must be a mastodon token. get from something like phanpy + webtools. or https://getauth.thms.uk/?client_name=xpost&scopes=read%20write%20profile but doesn't work with all software

     

       144
       144
       +
           "instance": "env:MASTODON_INSTNACE", // mastodon api compatible instance

     

       145
       145
       +
           "options": {

     

       146
       146
       +
               "visibility": "public"

     

       147
       147
       +
           }

     

       148
       148
       +
       }

     

       149
       149
       +
       ```

     

       150
       150
       +
       

     

       151
       151
       +
       ### Bluesky

     

       152
       152
       +
       

     

       153
       153
       +
       in the bluesky block, you can configure who is allowed to reply to and quote the new posts. handle becomes optional if you specify a DID.

     

       154
       154
       +
       

     

       155
       155
       +
       ```json5

     

       156
       156
       +
       {

     

       157
       157
       +
           "type": "bluesky", // type

     

       158
       158
       +
           "handle": "env:BLUESKY_HANDLE", // handle (e.g. melontini.me)

     

       159
       159
       +
           "app_password": "env:BLUESKY_APP_PASSWORD", // https://bsky.app/settings/app-passwords

     

       160
       160
       +
           "did": "env:BLUESKY_DID", // use a DID instead of handle (avoids handle resolution)

     

       161
       161
       +
           "pds": "env:BLUESKY_PDS", // specify Your PDS directly (avoids DID doc lookup)

     

       162
       162
       +
           "bsky_appview": "env:BLUESKY_APPVIEW", // bypass suspensions by specifying a different appview (e.g. did:web:bsky.zeppelin.social)

     

       163
       163
       +
           "options": {

     

       164
       164
       +
               "encode_videos": true, // bluesky only accepts mp4 videos, try to convert if the video is not mp4

     

       165
       165
       +
               "quote_gate": false, // block users from quoting the post

     

       166
       166
       +
               "thread_gate": [ // block replies. leave empty to disable replies

     

       167
       167
       +
                 "mentioned",

     

       168
       168
       +
                 "following",

     

       169
       169
       +
                 "followers",

     

       170
       170
       +
                 "everybody" // allow everybody to reply (ignores other options)

     

       171
       171
       +
               ]

     

       172
       172
       +
           }

     

       173
       173
       +
       }

     

       174
       174
       +
       ```

+196

bluesky/atproto2.py

···

       1
       1
       +
       from typing import Any

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import AtUri, Client, IdResolver, client_utils

     

       4
       4
       +
       from atproto_client import models

     

       5
       5
       +
       

     

       6
       6
       +
       from util.util import LOGGER

     

       7
       7
       +
       

     

       8
       8
       +
       

     

       9
       9
       +
       def resolve_identity(

     

       10
       10
       +
           handle: str | None = None, did: str | None = None, pds: str | None = None

     

       11
       11
       +
       ):

     

       12
       12
       +
           """helper to try and resolve identity from provided parameters, a valid handle is enough"""

     

       13
       13
       +
       

     

       14
       14
       +
           if did and pds:

     

       15
       15
       +
               return did, pds[:-1] if pds.endswith("/") else pds

     

       16
       16
       +
       

     

       17
       17
       +
           resolver = IdResolver()

     

       18
       18
       +
           if not did:

     

       19
       19
       +
               if not handle:

     

       20
       20
       +
                   raise Exception("ATP handle not specified!")

     

       21
       21
       +
               LOGGER.info("Resolving ATP identity for %s...", handle)

     

       22
       22
       +
               did = resolver.handle.resolve(handle)

     

       23
       23
       +
           if not did:

     

       24
       24
       +
               raise Exception("Failed to resolve DID!")

     

       25
       25
       +
       

     

       26
       26
       +
           if not pds:

     

       27
       27
       +
               LOGGER.info("Resolving PDS from DID document...")

     

       28
       28
       +
               did_doc = resolver.did.resolve(did)

     

       29
       29
       +
               if not did_doc:

     

       30
       30
       +
                   raise Exception("Failed to resolve DID doc for '%s'", did)

     

       31
       31
       +
               pds = did_doc.get_pds_endpoint()

     

       32
       32
       +
           if not pds:

     

       33
       33
       +
               raise Exception("Failed to resolve PDS!")

     

       34
       34
       +
       

     

       35
       35
       +
           return did, pds[:-1] if pds.endswith("/") else pds

     

       36
       36
       +
       

     

       37
       37
       +
       

     

       38
       38
       +
       class Client2(Client):

     

       39
       39
       +
           def __init__(self, base_url: str | None = None, *args: Any, **kwargs: Any) -> None:

     

       40
       40
       +
               super().__init__(base_url, *args, **kwargs)

     

       41
       41
       +
       

     

       42
       42
       +
           def send_video(

     

       43
       43
       +
               self,

     

       44
       44
       +
               text: str | client_utils.TextBuilder,

     

       45
       45
       +
               video: bytes,

     

       46
       46
       +
               video_alt: str | None = None,

     

       47
       47
       +
               video_aspect_ratio: models.AppBskyEmbedDefs.AspectRatio | None = None,

     

       48
       48
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       49
       49
       +
               langs: list[str] | None = None,

     

       50
       50
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       51
       51
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       52
       52
       +
               time_iso: str | None = None,

     

       53
       53
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       54
       54
       +
               """same as send_video, but with labels"""

     

       55
       55
       +
       

     

       56
       56
       +
               if video_alt is None:

     

       57
       57
       +
                   video_alt = ""

     

       58
       58
       +
       

     

       59
       59
       +
               upload = self.upload_blob(video)

     

       60
       60
       +
       

     

       61
       61
       +
               return self.send_post(

     

       62
       62
       +
                   text,

     

       63
       63
       +
                   reply_to=reply_to,

     

       64
       64
       +
                   embed=models.AppBskyEmbedVideo.Main(

     

       65
       65
       +
                       video=upload.blob, alt=video_alt, aspect_ratio=video_aspect_ratio

     

       66
       66
       +
                   ),

     

       67
       67
       +
                   langs=langs,

     

       68
       68
       +
                   facets=facets,

     

       69
       69
       +
                   labels=labels,

     

       70
       70
       +
                   time_iso=time_iso,

     

       71
       71
       +
               )

     

       72
       72
       +
       

     

       73
       73
       +
           def send_images(

     

       74
       74
       +
               self,

     

       75
       75
       +
               text: str | client_utils.TextBuilder,

     

       76
       76
       +
               images: list[bytes],

     

       77
       77
       +
               image_alts: list[str] | None = None,

     

       78
       78
       +
               image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] | None = None,

     

       79
       79
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       80
       80
       +
               langs: list[str] | None = None,

     

       81
       81
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       82
       82
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       83
       83
       +
               time_iso: str | None = None,

     

       84
       84
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       85
       85
       +
               """same as send_images, but with labels"""

     

       86
       86
       +
       

     

       87
       87
       +
               if image_alts is None:

     

       88
       88
       +
                   image_alts = [""] * len(images)

     

       89
       89
       +
               else:

     

       90
       90
       +
                   diff = len(images) - len(image_alts)

     

       91
       91
       +
                   image_alts = image_alts + [""] * diff

     

       92
       92
       +
       

     

       93
       93
       +
               if image_aspect_ratios is None:

     

       94
       94
       +
                   aligned_image_aspect_ratios = [None] * len(images)

     

       95
       95
       +
               else:

     

       96
       96
       +
                   diff = len(images) - len(image_aspect_ratios)

     

       97
       97
       +
                   aligned_image_aspect_ratios = image_aspect_ratios + [None] * diff

     

       98
       98
       +
       

     

       99
       99
       +
               uploads = [self.upload_blob(image) for image in images]

     

       100
       100
       +
       

     

       101
       101
       +
               embed_images = [

     

       102
       102
       +
                   models.AppBskyEmbedImages.Image(

     

       103
       103
       +
                       alt=alt, image=upload.blob, aspect_ratio=aspect_ratio

     

       104
       104
       +
                   )

     

       105
       105
       +
                   for alt, upload, aspect_ratio in zip(

     

       106
       106
       +
                       image_alts, uploads, aligned_image_aspect_ratios

     

       107
       107
       +
                   )

     

       108
       108
       +
               ]

     

       109
       109
       +
       

     

       110
       110
       +
               return self.send_post(

     

       111
       111
       +
                   text,

     

       112
       112
       +
                   reply_to=reply_to,

     

       113
       113
       +
                   embed=models.AppBskyEmbedImages.Main(images=embed_images),

     

       114
       114
       +
                   langs=langs,

     

       115
       115
       +
                   facets=facets,

     

       116
       116
       +
                   labels=labels,

     

       117
       117
       +
                   time_iso=time_iso,

     

       118
       118
       +
               )

     

       119
       119
       +
       

     

       120
       120
       +
           def send_post(

     

       121
       121
       +
               self,

     

       122
       122
       +
               text: str | client_utils.TextBuilder,

     

       123
       123
       +
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       124
       124
       +
               embed: None

     

       125
       125
       +
               | models.AppBskyEmbedImages.Main

     

       126
       126
       +
               | models.AppBskyEmbedExternal.Main

     

       127
       127
       +
               | models.AppBskyEmbedRecord.Main

     

       128
       128
       +
               | models.AppBskyEmbedRecordWithMedia.Main

     

       129
       129
       +
               | models.AppBskyEmbedVideo.Main = None,

     

       130
       130
       +
               langs: list[str] | None = None,

     

       131
       131
       +
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       132
       132
       +
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None,

     

       133
       133
       +
               time_iso: str | None = None,

     

       134
       134
       +
           ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       135
       135
       +
               """same as send_post, but with labels"""

     

       136
       136
       +
       

     

       137
       137
       +
               if isinstance(text, client_utils.TextBuilder):

     

       138
       138
       +
                   facets = text.build_facets()

     

       139
       139
       +
                   text = text.build_text()

     

       140
       140
       +
       

     

       141
       141
       +
               repo = self.me and self.me.did

     

       142
       142
       +
               if not repo:

     

       143
       143
       +
                   raise Exception("Client not logged in!")

     

       144
       144
       +
       

     

       145
       145
       +
               if not langs:

     

       146
       146
       +
                   langs = ["en"]

     

       147
       147
       +
       

     

       148
       148
       +
               record = models.AppBskyFeedPost.Record(

     

       149
       149
       +
                   created_at=time_iso or self.get_current_time_iso(),

     

       150
       150
       +
                   text=text,

     

       151
       151
       +
                   reply=reply_to or None,

     

       152
       152
       +
                   embed=embed or None,

     

       153
       153
       +
                   langs=langs,

     

       154
       154
       +
                   facets=facets or None,

     

       155
       155
       +
                   labels=labels or None,

     

       156
       156
       +
               )

     

       157
       157
       +
               return self.app.bsky.feed.post.create(repo, record)

     

       158
       158
       +
       

     

       159
       159
       +
           def create_gates(

     

       160
       160
       +
               self,

     

       161
       161
       +
               thread_gate_opts: list[str],

     

       162
       162
       +
               quote_gate: bool,

     

       163
       163
       +
               post_uri: str,

     

       164
       164
       +
               time_iso: str | None = None,

     

       165
       165
       +
           ):

     

       166
       166
       +
               account = self.me

     

       167
       167
       +
               if not account:

     

       168
       168
       +
                   raise Exception("Client not logged in!")

     

       169
       169
       +
       

     

       170
       170
       +
               rkey = AtUri.from_str(post_uri).rkey

     

       171
       171
       +
               time_iso = time_iso or self.get_current_time_iso()

     

       172
       172
       +
       

     

       173
       173
       +
               if "everybody" not in thread_gate_opts:

     

       174
       174
       +
                   allow = []

     

       175
       175
       +
                   if thread_gate_opts:

     

       176
       176
       +
                       if "following" in thread_gate_opts:

     

       177
       177
       +
                           allow.append(models.AppBskyFeedThreadgate.FollowingRule())

     

       178
       178
       +
                       if "followers" in thread_gate_opts:

     

       179
       179
       +
                           allow.append(models.AppBskyFeedThreadgate.FollowerRule())

     

       180
       180
       +
                       if "mentioned" in thread_gate_opts:

     

       181
       181
       +
                           allow.append(models.AppBskyFeedThreadgate.MentionRule())

     

       182
       182
       +
       

     

       183
       183
       +
                   thread_gate = models.AppBskyFeedThreadgate.Record(

     

       184
       184
       +
                       post=post_uri, created_at=time_iso, allow=allow

     

       185
       185
       +
                   )

     

       186
       186
       +
       

     

       187
       187
       +
                   self.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)

     

       188
       188
       +
       

     

       189
       189
       +
               if quote_gate:

     

       190
       190
       +
                   post_gate = models.AppBskyFeedPostgate.Record(

     

       191
       191
       +
                       post=post_uri,

     

       192
       192
       +
                       created_at=time_iso,

     

       193
       193
       +
                       embedding_rules=[models.AppBskyFeedPostgate.DisableRule()],

     

       194
       194
       +
                   )

     

       195
       195
       +
       

     

       196
       196
       +
                   self.app.bsky.feed.postgate.create(account.did, post_gate, rkey)

+199

bluesky/common.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       from atproto import client_utils

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       from util.media import MediaInfo

     

       7
       7
       +
       from util.util import canonical_label

     

       8
       8
       +
       

     

       9
       9
       +
       # only for lexicon reference

     

       10
       10
       +
       SERVICE = "https://bsky.app"

     

       11
       11
       +
       

     

       12
       12
       +
       # TODO this is terrible and stupid

     

       13
       13
       +
       ADULT_PATTERN = re.compile(

     

       14
       14
       +
           r"\b(sexual content|nsfw|erotic|adult only|18\+)\b", re.IGNORECASE

     

       15
       15
       +
       )

     

       16
       16
       +
       PORN_PATTERN = re.compile(r"\b(porn|yiff|hentai|pornographic|fetish)\b", re.IGNORECASE)

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       class BlueskyPost(cross.Post):

     

       20
       20
       +
           def __init__(

     

       21
       21
       +
               self, record: dict, tokens: list[cross.Token], attachments: list[MediaInfo]

     

       22
       22
       +
           ) -> None:

     

       23
       23
       +
               super().__init__()

     

       24
       24
       +
               self.uri = record["$xpost.strongRef"]["uri"]

     

       25
       25
       +
               self.parent_uri = None

     

       26
       26
       +
               if record.get("reply"):

     

       27
       27
       +
                   self.parent_uri = record["reply"]["parent"]["uri"]

     

       28
       28
       +
       

     

       29
       29
       +
               self.tokens = tokens

     

       30
       30
       +
               self.timestamp = record["createdAt"]

     

       31
       31
       +
               labels = record.get("labels", {}).get("values")

     

       32
       32
       +
               self.spoiler = None

     

       33
       33
       +
               if labels:

     

       34
       34
       +
                   self.spoiler = ", ".join(

     

       35
       35
       +
                       [str(label["val"]).replace("-", " ") for label in labels]

     

       36
       36
       +
                   )

     

       37
       37
       +
       

     

       38
       38
       +
               self.attachments = attachments

     

       39
       39
       +
               self.languages = record.get("langs", [])

     

       40
       40
       +
       

     

       41
       41
       +
           # at:// of the post record

     

       42
       42
       +
           def get_id(self) -> str:

     

       43
       43
       +
               return self.uri

     

       44
       44
       +
       

     

       45
       45
       +
           def get_parent_id(self) -> str | None:

     

       46
       46
       +
               return self.parent_uri

     

       47
       47
       +
       

     

       48
       48
       +
           def get_tokens(self) -> list[cross.Token]:

     

       49
       49
       +
               return self.tokens

     

       50
       50
       +
       

     

       51
       51
       +
           def get_text_type(self) -> str:

     

       52
       52
       +
               return "text/plain"

     

       53
       53
       +
       

     

       54
       54
       +
           def get_timestamp(self) -> str:

     

       55
       55
       +
               return self.timestamp

     

       56
       56
       +
       

     

       57
       57
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       58
       58
       +
               return self.attachments

     

       59
       59
       +
       

     

       60
       60
       +
           def get_spoiler(self) -> str | None:

     

       61
       61
       +
               return self.spoiler

     

       62
       62
       +
       

     

       63
       63
       +
           def get_languages(self) -> list[str]:

     

       64
       64
       +
               return self.languages

     

       65
       65
       +
       

     

       66
       66
       +
           def is_sensitive(self) -> bool:

     

       67
       67
       +
               return self.spoiler is not None

     

       68
       68
       +
       

     

       69
       69
       +
           def get_post_url(self) -> str | None:

     

       70
       70
       +
               did, _, post_id = str(self.uri[len("at://") :]).split("/")

     

       71
       71
       +
       

     

       72
       72
       +
               return f"https://bsky.app/profile/{did}/post/{post_id}"

     

       73
       73
       +
       

     

       74
       74
       +
       

     

       75
       75
       +
       def tokenize_post(post: dict) -> list[cross.Token]:

     

       76
       76
       +
           text: str = post.get("text", "")

     

       77
       77
       +
           if not text:

     

       78
       78
       +
               return []

     

       79
       79
       +
           ut8_text = text.encode(encoding="utf-8")

     

       80
       80
       +
       

     

       81
       81
       +
           def decode(ut8: bytes) -> str:

     

       82
       82
       +
               return ut8.decode(encoding="utf-8")

     

       83
       83
       +
       

     

       84
       84
       +
           facets: list[dict] = post.get("facets", [])

     

       85
       85
       +
           if not facets:

     

       86
       86
       +
               return [cross.TextToken(decode(ut8_text))]

     

       87
       87
       +
       

     

       88
       88
       +
           slices: list[tuple[int, int, str, str]] = []

     

       89
       89
       +
       

     

       90
       90
       +
           for facet in facets:

     

       91
       91
       +
               features: list[dict] = facet.get("features", [])

     

       92
       92
       +
               if not features:

     

       93
       93
       +
                   continue

     

       94
       94
       +
       

     

       95
       95
       +
               # we don't support overlapping facets/features

     

       96
       96
       +
               feature = features[0]

     

       97
       97
       +
               feature_type = feature["$type"]

     

       98
       98
       +
               index = facet["index"]

     

       99
       99
       +
               match feature_type:

     

       100
       100
       +
                   case "app.bsky.richtext.facet#tag":

     

       101
       101
       +
                       slices.append(

     

       102
       102
       +
                           (index["byteStart"], index["byteEnd"], "tag", feature["tag"])

     

       103
       103
       +
                       )

     

       104
       104
       +
                   case "app.bsky.richtext.facet#link":

     

       105
       105
       +
                       slices.append(

     

       106
       106
       +
                           (index["byteStart"], index["byteEnd"], "link", feature["uri"])

     

       107
       107
       +
                       )

     

       108
       108
       +
                   case "app.bsky.richtext.facet#mention":

     

       109
       109
       +
                       slices.append(

     

       110
       110
       +
                           (index["byteStart"], index["byteEnd"], "mention", feature["did"])

     

       111
       111
       +
                       )

     

       112
       112
       +
       

     

       113
       113
       +
           if not slices:

     

       114
       114
       +
               return [cross.TextToken(decode(ut8_text))]

     

       115
       115
       +
       

     

       116
       116
       +
           slices.sort(key=lambda s: s[0])

     

       117
       117
       +
           unique: list[tuple[int, int, str, str]] = []

     

       118
       118
       +
           current_end = 0

     

       119
       119
       +
           for start, end, ttype, val in slices:

     

       120
       120
       +
               if start >= current_end:

     

       121
       121
       +
                   unique.append((start, end, ttype, val))

     

       122
       122
       +
                   current_end = end

     

       123
       123
       +
       

     

       124
       124
       +
           if not unique:

     

       125
       125
       +
               return [cross.TextToken(decode(ut8_text))]

     

       126
       126
       +
       

     

       127
       127
       +
           tokens: list[cross.Token] = []

     

       128
       128
       +
           prev = 0

     

       129
       129
       +
       

     

       130
       130
       +
           for start, end, ttype, val in unique:

     

       131
       131
       +
               if start > prev:

     

       132
       132
       +
                   # text between facets

     

       133
       133
       +
                   tokens.append(cross.TextToken(decode(ut8_text[prev:start])))

     

       134
       134
       +
               # facet token

     

       135
       135
       +
               match ttype:

     

       136
       136
       +
                   case "link":

     

       137
       137
       +
                       label = decode(ut8_text[start:end])

     

       138
       138
       +
       

     

       139
       139
       +
                       # try to unflatten links

     

       140
       140
       +
                       split = val.split("://", 1)

     

       141
       141
       +
                       if len(split) > 1:

     

       142
       142
       +
                           if split[1].startswith(label):

     

       143
       143
       +
                               tokens.append(cross.LinkToken(val, ""))

     

       144
       144
       +
                               prev = end

     

       145
       145
       +
                               continue

     

       146
       146
       +
       

     

       147
       147
       +
                           if label.endswith("...") and split[1].startswith(label[:-3]):

     

       148
       148
       +
                               tokens.append(cross.LinkToken(val, ""))

     

       149
       149
       +
                               prev = end

     

       150
       150
       +
                               continue

     

       151
       151
       +
       

     

       152
       152
       +
                       tokens.append(cross.LinkToken(val, label))

     

       153
       153
       +
                   case "tag":

     

       154
       154
       +
                       tag = decode(ut8_text[start:end])

     

       155
       155
       +
                       tokens.append(cross.TagToken(tag[1:] if tag.startswith("#") else tag))

     

       156
       156
       +
                   case "mention":

     

       157
       157
       +
                       mention = decode(ut8_text[start:end])

     

       158
       158
       +
                       tokens.append(

     

       159
       159
       +
                           cross.MentionToken(

     

       160
       160
       +
                               mention[1:] if mention.startswith("@") else mention, val

     

       161
       161
       +
                           )

     

       162
       162
       +
                       )

     

       163
       163
       +
               prev = end

     

       164
       164
       +
       

     

       165
       165
       +
           if prev < len(ut8_text):

     

       166
       166
       +
               tokens.append(cross.TextToken(decode(ut8_text[prev:])))

     

       167
       167
       +
       

     

       168
       168
       +
           return tokens

     

       169
       169
       +
       

     

       170
       170
       +
       

     

       171
       171
       +
       def tokens_to_richtext(tokens: list[cross.Token]) -> client_utils.TextBuilder | None:

     

       172
       172
       +
           builder = client_utils.TextBuilder()

     

       173
       173
       +
       

     

       174
       174
       +
           def flatten_link(href: str):

     

       175
       175
       +
               split = href.split("://", 1)

     

       176
       176
       +
               if len(split) > 1:

     

       177
       177
       +
                   href = split[1]

     

       178
       178
       +
       

     

       179
       179
       +
               if len(href) > 32:

     

       180
       180
       +
                   href = href[:32] + "..."

     

       181
       181
       +
       

     

       182
       182
       +
               return href

     

       183
       183
       +
       

     

       184
       184
       +
           for token in tokens:

     

       185
       185
       +
               if isinstance(token, cross.TextToken):

     

       186
       186
       +
                   builder.text(token.text)

     

       187
       187
       +
               elif isinstance(token, cross.LinkToken):

     

       188
       188
       +
                   if canonical_label(token.label, token.href):

     

       189
       189
       +
                       builder.link(flatten_link(token.href), token.href)

     

       190
       190
       +
                       continue

     

       191
       191
       +
       

     

       192
       192
       +
                   builder.link(token.label, token.href)

     

       193
       193
       +
               elif isinstance(token, cross.TagToken):

     

       194
       194
       +
                   builder.tag("#" + token.tag, token.tag.lower())

     

       195
       195
       +
               else:

     

       196
       196
       +
                   # fail on unsupported tokens

     

       197
       197
       +
                   return None

     

       198
       198
       +
       

     

       199
       199
       +
           return builder

+203

bluesky/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from typing import Any, Callable

     

       5
       5
       +
       

     

       6
       6
       +
       import websockets

     

       7
       7
       +
       from atproto_client import models

     

       8
       8
       +
       from atproto_client.models.utils import get_or_create as get_model_or_create

     

       9
       9
       +
       

     

       10
       10
       +
       import cross

     

       11
       11
       +
       import util.database as database

     

       12
       12
       +
       from bluesky.atproto2 import resolve_identity

     

       13
       13
       +
       from bluesky.common import SERVICE, BlueskyPost, tokenize_post

     

       14
       14
       +
       from util.database import DataBaseWorker

     

       15
       15
       +
       from util.media import MediaInfo, download_media

     

       16
       16
       +
       from util.util import LOGGER, as_envvar

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       class BlueskyInputOptions:

     

       20
       20
       +
           def __init__(self, o: dict) -> None:

     

       21
       21
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       22
       22
       +
       

     

       23
       23
       +
       

     

       24
       24
       +
       class BlueskyInput(cross.Input):

     

       25
       25
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       26
       26
       +
               self.options = BlueskyInputOptions(settings.get("options", {}))

     

       27
       27
       +
               did, pds = resolve_identity(

     

       28
       28
       +
                   handle=as_envvar(settings.get("handle")),

     

       29
       29
       +
                   did=as_envvar(settings.get("did")),

     

       30
       30
       +
                   pds=as_envvar(settings.get("pds")),

     

       31
       31
       +
               )

     

       32
       32
       +
               self.pds = pds

     

       33
       33
       +
       

     

       34
       34
       +
               # PDS is Not a service, the lexicon and rids are the same across pds

     

       35
       35
       +
               super().__init__(SERVICE, did, settings, db)

     

       36
       36
       +
       

     

       37
       37
       +
           def _on_post(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       38
       38
       +
               post_uri = post["$xpost.strongRef"]["uri"]

     

       39
       39
       +
               post_cid = post["$xpost.strongRef"]["cid"]

     

       40
       40
       +
       

     

       41
       41
       +
               parent_uri = None

     

       42
       42
       +
               if post.get("reply"):

     

       43
       43
       +
                   parent_uri = post["reply"]["parent"]["uri"]

     

       44
       44
       +
       

     

       45
       45
       +
               embed = post.get("embed", {})

     

       46
       46
       +
               if embed.get("$type") in (

     

       47
       47
       +
                   "app.bsky.embed.record",

     

       48
       48
       +
                   "app.bsky.embed.recordWithMedia",

     

       49
       49
       +
               ):

     

       50
       50
       +
                   did, collection, rid = str(embed["record"]["uri"][len("at://") :]).split(

     

       51
       51
       +
                       "/"

     

       52
       52
       +
                   )

     

       53
       53
       +
                   if collection == "app.bsky.feed.post":

     

       54
       54
       +
                       LOGGER.info("Skipping '%s'! Quote..", post_uri)

     

       55
       55
       +
                       return

     

       56
       56
       +
       

     

       57
       57
       +
               success = database.try_insert_post(

     

       58
       58
       +
                   self.db, post_uri, parent_uri, self.user_id, self.service

     

       59
       59
       +
               )

     

       60
       60
       +
               if not success:

     

       61
       61
       +
                   LOGGER.info("Skipping '%s' as parent post was not found in db!", post_uri)

     

       62
       62
       +
                   return

     

       63
       63
       +
               database.store_data(

     

       64
       64
       +
                   self.db, post_uri, self.user_id, self.service, {"cid": post_cid}

     

       65
       65
       +
               )

     

       66
       66
       +
       

     

       67
       67
       +
               tokens = tokenize_post(post)

     

       68
       68
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       69
       69
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", post_uri)

     

       70
       70
       +
                   return

     

       71
       71
       +
       

     

       72
       72
       +
               LOGGER.info("Crossposting '%s'...", post_uri)

     

       73
       73
       +
       

     

       74
       74
       +
               def get_blob_url(blob: str):

     

       75
       75
       +
                   return f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.user_id}&cid={blob}"

     

       76
       76
       +
       

     

       77
       77
       +
               attachments: list[MediaInfo] = []

     

       78
       78
       +
               if embed.get("$type") == "app.bsky.embed.images":

     

       79
       79
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedImages.Main)

     

       80
       80
       +
                   assert isinstance(model, models.AppBskyEmbedImages.Main)

     

       81
       81
       +
       

     

       82
       82
       +
                   for image in model.images:

     

       83
       83
       +
                       url = get_blob_url(image.image.cid.encode())

     

       84
       84
       +
                       LOGGER.info("Downloading %s...", url)

     

       85
       85
       +
                       io = download_media(url, image.alt)

     

       86
       86
       +
                       if not io:

     

       87
       87
       +
                           LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)

     

       88
       88
       +
                           return

     

       89
       89
       +
                       attachments.append(io)

     

       90
       90
       +
               elif embed.get("$type") == "app.bsky.embed.video":

     

       91
       91
       +
                   model = get_model_or_create(embed, model=models.AppBskyEmbedVideo.Main)

     

       92
       92
       +
                   assert isinstance(model, models.AppBskyEmbedVideo.Main)

     

       93
       93
       +
                   url = get_blob_url(model.video.cid.encode())

     

       94
       94
       +
                   LOGGER.info("Downloading %s...", url)

     

       95
       95
       +
                   io = download_media(url, model.alt if model.alt else "")

     

       96
       96
       +
                   if not io:

     

       97
       97
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", post_uri)

     

       98
       98
       +
                       return

     

       99
       99
       +
                   attachments.append(io)

     

       100
       100
       +
       

     

       101
       101
       +
               cross_post = BlueskyPost(post, tokens, attachments)

     

       102
       102
       +
               for output in outputs:

     

       103
       103
       +
                   output.accept_post(cross_post)

     

       104
       104
       +
       

     

       105
       105
       +
           def _on_delete_post(self, outputs: list[cross.Output], post_id: str, repost: bool):

     

       106
       106
       +
               post = database.find_post(self.db, post_id, self.user_id, self.service)

     

       107
       107
       +
               if not post:

     

       108
       108
       +
                   return

     

       109
       109
       +
       

     

       110
       110
       +
               LOGGER.info("Deleting '%s'...", post_id)

     

       111
       111
       +
               if repost:

     

       112
       112
       +
                   for output in outputs:

     

       113
       113
       +
                       output.delete_repost(post_id)

     

       114
       114
       +
               else:

     

       115
       115
       +
                   for output in outputs:

     

       116
       116
       +
                       output.delete_post(post_id)

     

       117
       117
       +
               database.delete_post(self.db, post_id, self.user_id, self.service)

     

       118
       118
       +
       

     

       119
       119
       +
           def _on_repost(self, outputs: list[cross.Output], post: dict[str, Any]):

     

       120
       120
       +
               post_uri = post["$xpost.strongRef"]["uri"]

     

       121
       121
       +
               post_cid = post["$xpost.strongRef"]["cid"]

     

       122
       122
       +
       

     

       123
       123
       +
               reposted_uri = post["subject"]["uri"]

     

       124
       124
       +
       

     

       125
       125
       +
               success = database.try_insert_repost(

     

       126
       126
       +
                   self.db, post_uri, reposted_uri, self.user_id, self.service

     

       127
       127
       +
               )

     

       128
       128
       +
               if not success:

     

       129
       129
       +
                   LOGGER.info("Skipping '%s' as reposted post was not found in db!", post_uri)

     

       130
       130
       +
                   return

     

       131
       131
       +
               database.store_data(

     

       132
       132
       +
                   self.db, post_uri, self.user_id, self.service, {"cid": post_cid}

     

       133
       133
       +
               )

     

       134
       134
       +
       

     

       135
       135
       +
               LOGGER.info("Crossposting '%s'...", post_uri)

     

       136
       136
       +
               for output in outputs:

     

       137
       137
       +
                   output.accept_repost(post_uri, reposted_uri)

     

       138
       138
       +
       

     

       139
       139
       +
       

     

       140
       140
       +
       class BlueskyJetstreamInput(BlueskyInput):

     

       141
       141
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       142
       142
       +
               super().__init__(settings, db)

     

       143
       143
       +
               self.jetstream = settings.get(

     

       144
       144
       +
                   "jetstream", "wss://jetstream2.us-east.bsky.network/subscribe"

     

       145
       145
       +
               )

     

       146
       146
       +
       

     

       147
       147
       +
           def __on_commit(self, outputs: list[cross.Output], msg: dict):

     

       148
       148
       +
               if msg.get("did") != self.user_id:

     

       149
       149
       +
                   return

     

       150
       150
       +
       

     

       151
       151
       +
               commit: dict = msg.get("commit", {})

     

       152
       152
       +
               if not commit:

     

       153
       153
       +
                   return

     

       154
       154
       +
       

     

       155
       155
       +
               commit_type = commit["operation"]

     

       156
       156
       +
               match commit_type:

     

       157
       157
       +
                   case "create":

     

       158
       158
       +
                       record = dict(commit.get("record", {}))

     

       159
       159
       +
                       record["$xpost.strongRef"] = {

     

       160
       160
       +
                           "cid": commit["cid"],

     

       161
       161
       +
                           "uri": f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}",

     

       162
       162
       +
                       }

     

       163
       163
       +
       

     

       164
       164
       +
                       match commit["collection"]:

     

       165
       165
       +
                           case "app.bsky.feed.post":

     

       166
       166
       +
                               self._on_post(outputs, record)

     

       167
       167
       +
                           case "app.bsky.feed.repost":

     

       168
       168
       +
                               self._on_repost(outputs, record)

     

       169
       169
       +
                   case "delete":

     

       170
       170
       +
                       post_id: str = (

     

       171
       171
       +
                           f"at://{self.user_id}/{commit['collection']}/{commit['rkey']}"

     

       172
       172
       +
                       )

     

       173
       173
       +
                       match commit["collection"]:

     

       174
       174
       +
                           case "app.bsky.feed.post":

     

       175
       175
       +
                               self._on_delete_post(outputs, post_id, False)

     

       176
       176
       +
                           case "app.bsky.feed.repost":

     

       177
       177
       +
                               self._on_delete_post(outputs, post_id, True)

     

       178
       178
       +
       

     

       179
       179
       +
           async def listen(

     

       180
       180
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       181
       181
       +
           ):

     

       182
       182
       +
               uri = self.jetstream + "?"

     

       183
       183
       +
               uri += "wantedCollections=app.bsky.feed.post"

     

       184
       184
       +
               uri += "&wantedCollections=app.bsky.feed.repost"

     

       185
       185
       +
               uri += f"&wantedDids={self.user_id}"

     

       186
       186
       +
       

     

       187
       187
       +
               async for ws in websockets.connect(

     

       188
       188
       +
                   uri, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       189
       189
       +
               ):

     

       190
       190
       +
                   try:

     

       191
       191
       +
                       LOGGER.info("Listening to %s...", self.jetstream)

     

       192
       192
       +
       

     

       193
       193
       +
                       async def listen_for_messages():

     

       194
       194
       +
                           async for msg in ws:

     

       195
       195
       +
                               submit(lambda: self.__on_commit(outputs, json.loads(msg)))

     

       196
       196
       +
       

     

       197
       197
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       198
       198
       +
       

     

       199
       199
       +
                       await asyncio.gather(listen)

     

       200
       200
       +
                   except websockets.ConnectionClosedError as e:

     

       201
       201
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       202
       202
       +
                       LOGGER.info("Reconnecting to %s...", self.jetstream)

     

       203
       203
       +
                       continue

+481

bluesky/output.py

···

       1
       1
       +
       from atproto import Request, client_utils

     

       2
       2
       +
       from atproto_client import models

     

       3
       3
       +
       from httpx import Timeout

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       import misskey.mfm_util as mfm_util

     

       7
       7
       +
       import util.database as database

     

       8
       8
       +
       from bluesky.atproto2 import Client2, resolve_identity

     

       9
       9
       +
       from bluesky.common import ADULT_PATTERN, PORN_PATTERN, SERVICE, tokens_to_richtext

     

       10
       10
       +
       from util.database import DataBaseWorker

     

       11
       11
       +
       from util.media import (

     

       12
       12
       +
           MediaInfo,

     

       13
       13
       +
           compress_image,

     

       14
       14
       +
           convert_to_mp4,

     

       15
       15
       +
           get_filename_from_url,

     

       16
       16
       +
           get_media_meta,

     

       17
       17
       +
       )

     

       18
       18
       +
       from util.util import LOGGER, as_envvar

     

       19
       19
       +
       

     

       20
       20
       +
       ALLOWED_GATES = ["mentioned", "following", "followers", "everybody"]

     

       21
       21
       +
       

     

       22
       22
       +
       

     

       23
       23
       +
       class BlueskyOutputOptions:

     

       24
       24
       +
           def __init__(self, o: dict) -> None:

     

       25
       25
       +
               self.quote_gate: bool = False

     

       26
       26
       +
               self.thread_gate: list[str] = ["everybody"]

     

       27
       27
       +
               self.encode_videos: bool = True

     

       28
       28
       +
       

     

       29
       29
       +
               quote_gate = o.get("quote_gate")

     

       30
       30
       +
               if quote_gate is not None:

     

       31
       31
       +
                   self.quote_gate = bool(quote_gate)

     

       32
       32
       +
       

     

       33
       33
       +
               thread_gate = o.get("thread_gate")

     

       34
       34
       +
               if thread_gate is not None:

     

       35
       35
       +
                   if any([v not in ALLOWED_GATES for v in thread_gate]):

     

       36
       36
       +
                       raise ValueError(

     

       37
       37
       +
                           f"'thread_gate' only accepts {', '.join(ALLOWED_GATES)} or [], got: {thread_gate}"

     

       38
       38
       +
                       )

     

       39
       39
       +
                   self.thread_gate = thread_gate

     

       40
       40
       +
       

     

       41
       41
       +
               encode_videos = o.get("encode_videos")

     

       42
       42
       +
               if encode_videos is not None:

     

       43
       43
       +
                   self.encode_videos = bool(encode_videos)

     

       44
       44
       +
       

     

       45
       45
       +
       

     

       46
       46
       +
       class BlueskyOutput(cross.Output):

     

       47
       47
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       48
       48
       +
               super().__init__(input, settings, db)

     

       49
       49
       +
               self.options = BlueskyOutputOptions(settings.get("options") or {})

     

       50
       50
       +
       

     

       51
       51
       +
               if not as_envvar(settings.get("app-password")):

     

       52
       52
       +
                   raise Exception("Account app password not provided!")

     

       53
       53
       +
       

     

       54
       54
       +
               did, pds = resolve_identity(

     

       55
       55
       +
                   handle=as_envvar(settings.get("handle")),

     

       56
       56
       +
                   did=as_envvar(settings.get("did")),

     

       57
       57
       +
                   pds=as_envvar(settings.get("pds")),

     

       58
       58
       +
               )

     

       59
       59
       +
       

     

       60
       60
       +
               reqs = Request(timeout=Timeout(None, connect=30.0))

     

       61
       61
       +
       

     

       62
       62
       +
               self.bsky = Client2(pds, request=reqs)

     

       63
       63
       +
               self.bsky.configure_proxy_header(

     

       64
       64
       +
                   service_type="bsky_appview",

     

       65
       65
       +
                   did=as_envvar(settings.get("bsky_appview")) or "did:web:api.bsky.app",

     

       66
       66
       +
               )

     

       67
       67
       +
               self.bsky.login(did, as_envvar(settings.get("app-password")))

     

       68
       68
       +
       

     

       69
       69
       +
           def __check_login(self):

     

       70
       70
       +
               login = self.bsky.me

     

       71
       71
       +
               if not login:

     

       72
       72
       +
                   raise Exception("Client not logged in!")

     

       73
       73
       +
               return login

     

       74
       74
       +
       

     

       75
       75
       +
           def _find_parent(self, parent_id: str):

     

       76
       76
       +
               login = self.__check_login()

     

       77
       77
       +
       

     

       78
       78
       +
               thread_tuple = database.find_mapped_thread(

     

       79
       79
       +
                   self.db,

     

       80
       80
       +
                   parent_id,

     

       81
       81
       +
                   self.input.user_id,

     

       82
       82
       +
                   self.input.service,

     

       83
       83
       +
                   login.did,

     

       84
       84
       +
                   SERVICE,

     

       85
       85
       +
               )

     

       86
       86
       +
       

     

       87
       87
       +
               if not thread_tuple:

     

       88
       88
       +
                   LOGGER.error("Failed to find thread tuple in the database!")

     

       89
       89
       +
                   return None

     

       90
       90
       +
       

     

       91
       91
       +
               root_uri: str = thread_tuple[0]

     

       92
       92
       +
               reply_uri: str = thread_tuple[1]

     

       93
       93
       +
       

     

       94
       94
       +
               root_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]

     

       95
       95
       +
               reply_cid = database.fetch_data(self.db, root_uri, login.did, SERVICE)["cid"]

     

       96
       96
       +
       

     

       97
       97
       +
               root_record = models.AppBskyFeedPost.CreateRecordResponse(

     

       98
       98
       +
                   uri=root_uri, cid=root_cid

     

       99
       99
       +
               )

     

       100
       100
       +
               reply_record = models.AppBskyFeedPost.CreateRecordResponse(

     

       101
       101
       +
                   uri=reply_uri, cid=reply_cid

     

       102
       102
       +
               )

     

       103
       103
       +
       

     

       104
       104
       +
               return (

     

       105
       105
       +
                   models.create_strong_ref(root_record),

     

       106
       106
       +
                   models.create_strong_ref(reply_record),

     

       107
       107
       +
                   thread_tuple[2],

     

       108
       108
       +
                   thread_tuple[3],

     

       109
       109
       +
               )

     

       110
       110
       +
       

     

       111
       111
       +
           def _split_attachments(self, attachments: list[MediaInfo]):

     

       112
       112
       +
               sup_media: list[MediaInfo] = []

     

       113
       113
       +
               unsup_media: list[MediaInfo] = []

     

       114
       114
       +
       

     

       115
       115
       +
               for a in attachments:

     

       116
       116
       +
                   if a.mime.startswith("image/") or a.mime.startswith(

     

       117
       117
       +
                       "video/"

     

       118
       118
       +
                   ):  # TODO convert gifs to videos

     

       119
       119
       +
                       sup_media.append(a)

     

       120
       120
       +
                   else:

     

       121
       121
       +
                       unsup_media.append(a)

     

       122
       122
       +
       

     

       123
       123
       +
               return (sup_media, unsup_media)

     

       124
       124
       +
       

     

       125
       125
       +
           def _split_media_per_post(

     

       126
       126
       +
               self, tokens: list[client_utils.TextBuilder], media: list[MediaInfo]

     

       127
       127
       +
           ):

     

       128
       128
       +
               posts: list[dict] = [{"tokens": tokens, "attachments": []} for tokens in tokens]

     

       129
       129
       +
               available_indices: list[int] = list(range(len(posts)))

     

       130
       130
       +
       

     

       131
       131
       +
               current_image_post_idx: int | None = None

     

       132
       132
       +
       

     

       133
       133
       +
               def make_blank_post() -> dict:

     

       134
       134
       +
                   return {"tokens": [client_utils.TextBuilder().text("")], "attachments": []}

     

       135
       135
       +
       

     

       136
       136
       +
               def pop_next_empty_index() -> int:

     

       137
       137
       +
                   if available_indices:

     

       138
       138
       +
                       return available_indices.pop(0)

     

       139
       139
       +
                   else:

     

       140
       140
       +
                       new_idx = len(posts)

     

       141
       141
       +
                       posts.append(make_blank_post())

     

       142
       142
       +
                       return new_idx

     

       143
       143
       +
       

     

       144
       144
       +
               for att in media:

     

       145
       145
       +
                   if att.mime.startswith("video/"):

     

       146
       146
       +
                       current_image_post_idx = None

     

       147
       147
       +
                       idx = pop_next_empty_index()

     

       148
       148
       +
                       posts[idx]["attachments"].append(att)

     

       149
       149
       +
                   elif att.mime.startswith("image/"):

     

       150
       150
       +
                       if (

     

       151
       151
       +
                           current_image_post_idx is not None

     

       152
       152
       +
                           and len(posts[current_image_post_idx]["attachments"]) < 4

     

       153
       153
       +
                       ):

     

       154
       154
       +
                           posts[current_image_post_idx]["attachments"].append(att)

     

       155
       155
       +
                       else:

     

       156
       156
       +
                           idx = pop_next_empty_index()

     

       157
       157
       +
                           posts[idx]["attachments"].append(att)

     

       158
       158
       +
                           current_image_post_idx = idx

     

       159
       159
       +
       

     

       160
       160
       +
               result: list[tuple[client_utils.TextBuilder, list[MediaInfo]]] = []

     

       161
       161
       +
               for p in posts:

     

       162
       162
       +
                   result.append((p["tokens"], p["attachments"]))

     

       163
       163
       +
               return result

     

       164
       164
       +
       

     

       165
       165
       +
           def accept_post(self, post: cross.Post):

     

       166
       166
       +
               login = self.__check_login()

     

       167
       167
       +
       

     

       168
       168
       +
               parent_id = post.get_parent_id()

     

       169
       169
       +
       

     

       170
       170
       +
               # used for db insertion

     

       171
       171
       +
               new_root_id = None

     

       172
       172
       +
               new_parent_id = None

     

       173
       173
       +
       

     

       174
       174
       +
               root_ref = None

     

       175
       175
       +
               reply_ref = None

     

       176
       176
       +
               if parent_id:

     

       177
       177
       +
                   parents = self._find_parent(parent_id)

     

       178
       178
       +
                   if not parents:

     

       179
       179
       +
                       return

     

       180
       180
       +
                   root_ref, reply_ref, new_root_id, new_parent_id = parents

     

       181
       181
       +
       

     

       182
       182
       +
               tokens = post.get_tokens().copy()

     

       183
       183
       +
       

     

       184
       184
       +
               unique_labels: set[str] = set()

     

       185
       185
       +
               cw = post.get_spoiler()

     

       186
       186
       +
               if cw:

     

       187
       187
       +
                   tokens.insert(0, cross.TextToken("CW: " + cw + "\n\n"))

     

       188
       188
       +
                   unique_labels.add("graphic-media")

     

       189
       189
       +
       

     

       190
       190
       +
                   # from bsky.app, a post can only have one of those labels

     

       191
       191
       +
                   if PORN_PATTERN.search(cw):

     

       192
       192
       +
                       unique_labels.add("porn")

     

       193
       193
       +
                   elif ADULT_PATTERN.search(cw):

     

       194
       194
       +
                       unique_labels.add("sexual")

     

       195
       195
       +
       

     

       196
       196
       +
               if post.is_sensitive():

     

       197
       197
       +
                   unique_labels.add("graphic-media")

     

       198
       198
       +
       

     

       199
       199
       +
               labels = (

     

       200
       200
       +
                   models.ComAtprotoLabelDefs.SelfLabels(

     

       201
       201
       +
                       values=[

     

       202
       202
       +
                           models.ComAtprotoLabelDefs.SelfLabel(val=label)

     

       203
       203
       +
                           for label in unique_labels

     

       204
       204
       +
                       ]

     

       205
       205
       +
                   )

     

       206
       206
       +
                   if unique_labels

     

       207
       207
       +
                   else None

     

       208
       208
       +
               )

     

       209
       209
       +
       

     

       210
       210
       +
               sup_media, unsup_media = self._split_attachments(post.get_attachments())

     

       211
       211
       +
       

     

       212
       212
       +
               if unsup_media:

     

       213
       213
       +
                   if tokens:

     

       214
       214
       +
                       tokens.append(cross.TextToken("\n"))

     

       215
       215
       +
                   for i, attachment in enumerate(unsup_media):

     

       216
       216
       +
                       tokens.append(

     

       217
       217
       +
                           cross.LinkToken(

     

       218
       218
       +
                               attachment.url, f"[{get_filename_from_url(attachment.url)}]"

     

       219
       219
       +
                           )

     

       220
       220
       +
                       )

     

       221
       221
       +
                       tokens.append(cross.TextToken(" "))

     

       222
       222
       +
       

     

       223
       223
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       224
       224
       +
                   tokens, status = mfm_util.strip_mfm(tokens)

     

       225
       225
       +
                   post_url = post.get_post_url()

     

       226
       226
       +
                   if status and post_url:

     

       227
       227
       +
                       tokens.append(cross.TextToken("\n"))

     

       228
       228
       +
                       tokens.append(

     

       229
       229
       +
                           cross.LinkToken(post_url, "[Post contains MFM, see original]")

     

       230
       230
       +
                       )

     

       231
       231
       +
       

     

       232
       232
       +
               split_tokens: list[list[cross.Token]] = cross.split_tokens(tokens, 300)

     

       233
       233
       +
               post_text: list[client_utils.TextBuilder] = []

     

       234
       234
       +
       

     

       235
       235
       +
               # convert tokens into rich text. skip post if contains unsupported tokens

     

       236
       236
       +
               for block in split_tokens:

     

       237
       237
       +
                   rich_text = tokens_to_richtext(block)

     

       238
       238
       +
       

     

       239
       239
       +
                   if not rich_text:

     

       240
       240
       +
                       LOGGER.error(

     

       241
       241
       +
                           "Skipping '%s' as it contains invalid rich text types!",

     

       242
       242
       +
                           post.get_id(),

     

       243
       243
       +
                       )

     

       244
       244
       +
                       return

     

       245
       245
       +
                   post_text.append(rich_text)

     

       246
       246
       +
       

     

       247
       247
       +
               if not post_text:

     

       248
       248
       +
                   post_text = [client_utils.TextBuilder().text("")]

     

       249
       249
       +
       

     

       250
       250
       +
               for m in sup_media:

     

       251
       251
       +
                   if m.mime.startswith("image/"):

     

       252
       252
       +
                       if len(m.io) > 2_000_000:

     

       253
       253
       +
                           LOGGER.error(

     

       254
       254
       +
                               "Skipping post_id '%s', failed to download attachment! File too large.",

     

       255
       255
       +
                               post.get_id(),

     

       256
       256
       +
                           )

     

       257
       257
       +
                           return

     

       258
       258
       +
       

     

       259
       259
       +
                   if m.mime.startswith("video/"):

     

       260
       260
       +
                       if m.mime != "video/mp4" and not self.options.encode_videos:

     

       261
       261
       +
                           LOGGER.info(

     

       262
       262
       +
                               "Video is not mp4, but encoding is disabled. Skipping '%s'...",

     

       263
       263
       +
                               post.get_id(),

     

       264
       264
       +
                           )

     

       265
       265
       +
                           return

     

       266
       266
       +
       

     

       267
       267
       +
                       if len(m.io) > 100_000_000:

     

       268
       268
       +
                           LOGGER.error(

     

       269
       269
       +
                               "Skipping post_id '%s', failed to download attachment! File too large?",

     

       270
       270
       +
                               post.get_id(),

     

       271
       271
       +
                           )

     

       272
       272
       +
                           return

     

       273
       273
       +
       

     

       274
       274
       +
               created_records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       275
       275
       +
               baked_media = self._split_media_per_post(post_text, sup_media)

     

       276
       276
       +
       

     

       277
       277
       +
               for text, attachments in baked_media:

     

       278
       278
       +
                   if not attachments:

     

       279
       279
       +
                       if reply_ref and root_ref:

     

       280
       280
       +
                           new_post = self.bsky.send_post(

     

       281
       281
       +
                               text,

     

       282
       282
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       283
       283
       +
                                   parent=reply_ref, root=root_ref

     

       284
       284
       +
                               ),

     

       285
       285
       +
                               labels=labels,

     

       286
       286
       +
                               time_iso=post.get_timestamp(),

     

       287
       287
       +
                           )

     

       288
       288
       +
                       else:

     

       289
       289
       +
                           new_post = self.bsky.send_post(

     

       290
       290
       +
                               text, labels=labels, time_iso=post.get_timestamp()

     

       291
       291
       +
                           )

     

       292
       292
       +
                           root_ref = models.create_strong_ref(new_post)

     

       293
       293
       +
       

     

       294
       294
       +
                       self.bsky.create_gates(

     

       295
       295
       +
                           self.options.thread_gate,

     

       296
       296
       +
                           self.options.quote_gate,

     

       297
       297
       +
                           new_post.uri,

     

       298
       298
       +
                           time_iso=post.get_timestamp(),

     

       299
       299
       +
                       )

     

       300
       300
       +
                       reply_ref = models.create_strong_ref(new_post)

     

       301
       301
       +
                       created_records.append(new_post)

     

       302
       302
       +
                   else:

     

       303
       303
       +
                       # if a single post is an image - everything else is an image

     

       304
       304
       +
                       if attachments[0].mime.startswith("image/"):

     

       305
       305
       +
                           images: list[bytes] = []

     

       306
       306
       +
                           image_alts: list[str] = []

     

       307
       307
       +
                           image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       308
       308
       +
       

     

       309
       309
       +
                           for attachment in attachments:

     

       310
       310
       +
                               image_io = compress_image(attachment.io, quality=100)

     

       311
       311
       +
                               metadata = get_media_meta(image_io)

     

       312
       312
       +
       

     

       313
       313
       +
                               if len(image_io) > 1_000_000:

     

       314
       314
       +
                                   LOGGER.info("Compressing %s...", attachment.name)

     

       315
       315
       +
                                   image_io = compress_image(image_io)

     

       316
       316
       +
       

     

       317
       317
       +
                               images.append(image_io)

     

       318
       318
       +
                               image_alts.append(attachment.alt)

     

       319
       319
       +
                               image_aspect_ratios.append(

     

       320
       320
       +
                                   models.AppBskyEmbedDefs.AspectRatio(

     

       321
       321
       +
                                       width=metadata["width"], height=metadata["height"]

     

       322
       322
       +
                                   )

     

       323
       323
       +
                               )

     

       324
       324
       +
       

     

       325
       325
       +
                           new_post = self.bsky.send_images(

     

       326
       326
       +
                               text=post_text[0],

     

       327
       327
       +
                               images=images,

     

       328
       328
       +
                               image_alts=image_alts,

     

       329
       329
       +
                               image_aspect_ratios=image_aspect_ratios,

     

       330
       330
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       331
       331
       +
                                   parent=reply_ref, root=root_ref

     

       332
       332
       +
                               )

     

       333
       333
       +
                               if root_ref and reply_ref

     

       334
       334
       +
                               else None,

     

       335
       335
       +
                               labels=labels,

     

       336
       336
       +
                               time_iso=post.get_timestamp(),

     

       337
       337
       +
                           )

     

       338
       338
       +
                           if not root_ref:

     

       339
       339
       +
                               root_ref = models.create_strong_ref(new_post)

     

       340
       340
       +
       

     

       341
       341
       +
                           self.bsky.create_gates(

     

       342
       342
       +
                               self.options.thread_gate,

     

       343
       343
       +
                               self.options.quote_gate,

     

       344
       344
       +
                               new_post.uri,

     

       345
       345
       +
                               time_iso=post.get_timestamp(),

     

       346
       346
       +
                           )

     

       347
       347
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       348
       348
       +
                           created_records.append(new_post)

     

       349
       349
       +
                       else:  # video is guarantedd to be one

     

       350
       350
       +
                           metadata = get_media_meta(attachments[0].io)

     

       351
       351
       +
                           if metadata["duration"] > 180:

     

       352
       352
       +
                               LOGGER.info(

     

       353
       353
       +
                                   "Skipping post_id '%s', video attachment too long!",

     

       354
       354
       +
                                   post.get_id(),

     

       355
       355
       +
                               )

     

       356
       356
       +
                               return

     

       357
       357
       +
       

     

       358
       358
       +
                           video_io = attachments[0].io

     

       359
       359
       +
                           if attachments[0].mime != "video/mp4":

     

       360
       360
       +
                               LOGGER.info("Converting %s to mp4...", attachments[0].name)

     

       361
       361
       +
                               video_io = convert_to_mp4(video_io)

     

       362
       362
       +
       

     

       363
       363
       +
                           aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(

     

       364
       364
       +
                               width=metadata["width"], height=metadata["height"]

     

       365
       365
       +
                           )

     

       366
       366
       +
       

     

       367
       367
       +
                           new_post = self.bsky.send_video(

     

       368
       368
       +
                               text=post_text[0],

     

       369
       369
       +
                               video=video_io,

     

       370
       370
       +
                               video_aspect_ratio=aspect_ratio,

     

       371
       371
       +
                               video_alt=attachments[0].alt,

     

       372
       372
       +
                               reply_to=models.AppBskyFeedPost.ReplyRef(

     

       373
       373
       +
                                   parent=reply_ref, root=root_ref

     

       374
       374
       +
                               )

     

       375
       375
       +
                               if root_ref and reply_ref

     

       376
       376
       +
                               else None,

     

       377
       377
       +
                               labels=labels,

     

       378
       378
       +
                               time_iso=post.get_timestamp(),

     

       379
       379
       +
                           )

     

       380
       380
       +
                           if not root_ref:

     

       381
       381
       +
                               root_ref = models.create_strong_ref(new_post)

     

       382
       382
       +
       

     

       383
       383
       +
                           self.bsky.create_gates(

     

       384
       384
       +
                               self.options.thread_gate,

     

       385
       385
       +
                               self.options.quote_gate,

     

       386
       386
       +
                               new_post.uri,

     

       387
       387
       +
                               time_iso=post.get_timestamp(),

     

       388
       388
       +
                           )

     

       389
       389
       +
                           reply_ref = models.create_strong_ref(new_post)

     

       390
       390
       +
                           created_records.append(new_post)

     

       391
       391
       +
       

     

       392
       392
       +
               db_post = database.find_post(

     

       393
       393
       +
                   self.db, post.get_id(), self.input.user_id, self.input.service

     

       394
       394
       +
               )

     

       395
       395
       +
               assert db_post, "ghghghhhhh"

     

       396
       396
       +
       

     

       397
       397
       +
               if new_root_id is None or new_parent_id is None:

     

       398
       398
       +
                   new_root_id = database.insert_post(

     

       399
       399
       +
                       self.db, created_records[0].uri, login.did, SERVICE

     

       400
       400
       +
                   )

     

       401
       401
       +
                   database.store_data(

     

       402
       402
       +
                       self.db,

     

       403
       403
       +
                       created_records[0].uri,

     

       404
       404
       +
                       login.did,

     

       405
       405
       +
                       SERVICE,

     

       406
       406
       +
                       {"cid": created_records[0].cid},

     

       407
       407
       +
                   )

     

       408
       408
       +
       

     

       409
       409
       +
                   new_parent_id = new_root_id

     

       410
       410
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       411
       411
       +
                   created_records = created_records[1:]

     

       412
       412
       +
       

     

       413
       413
       +
               for record in created_records:

     

       414
       414
       +
                   new_parent_id = database.insert_reply(

     

       415
       415
       +
                       self.db, record.uri, login.did, SERVICE, new_parent_id, new_root_id

     

       416
       416
       +
                   )

     

       417
       417
       +
                   database.store_data(

     

       418
       418
       +
                       self.db, record.uri, login.did, SERVICE, {"cid": record.cid}

     

       419
       419
       +
                   )

     

       420
       420
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       421
       421
       +
       

     

       422
       422
       +
           def delete_post(self, identifier: str):

     

       423
       423
       +
               login = self.__check_login()

     

       424
       424
       +
       

     

       425
       425
       +
               post = database.find_post(

     

       426
       426
       +
                   self.db, identifier, self.input.user_id, self.input.service

     

       427
       427
       +
               )

     

       428
       428
       +
               if not post:

     

       429
       429
       +
                   return

     

       430
       430
       +
       

     

       431
       431
       +
               mappings = database.find_mappings(self.db, post["id"], SERVICE, login.did)

     

       432
       432
       +
               for mapping in mappings[::-1]:

     

       433
       433
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       434
       434
       +
                   self.bsky.delete_post(mapping[0])

     

       435
       435
       +
                   database.delete_post(self.db, mapping[0], SERVICE, login.did)

     

       436
       436
       +
       

     

       437
       437
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       438
       438
       +
               login, repost = self.__delete_repost(repost_id)

     

       439
       439
       +
               if not (login and repost):

     

       440
       440
       +
                   return

     

       441
       441
       +
       

     

       442
       442
       +
               reposted = database.find_post(

     

       443
       443
       +
                   self.db, reposted_id, self.input.user_id, self.input.service

     

       444
       444
       +
               )

     

       445
       445
       +
               if not reposted:

     

       446
       446
       +
                   return

     

       447
       447
       +
       

     

       448
       448
       +
               # mappings of the reposted post

     

       449
       449
       +
               mappings = database.find_mappings(self.db, reposted["id"], SERVICE, login.did)

     

       450
       450
       +
               if mappings:

     

       451
       451
       +
                   cid = database.fetch_data(self.db, mappings[0][0], login.did, SERVICE)[

     

       452
       452
       +
                       "cid"

     

       453
       453
       +
                   ]

     

       454
       454
       +
                   rsp = self.bsky.repost(mappings[0][0], cid)

     

       455
       455
       +
       

     

       456
       456
       +
                   internal_id = database.insert_repost(

     

       457
       457
       +
                       self.db, rsp.uri, reposted["id"], login.did, SERVICE

     

       458
       458
       +
                   )

     

       459
       459
       +
                   database.store_data(self.db, rsp.uri, login.did, SERVICE, {"cid": rsp.cid})

     

       460
       460
       +
                   database.insert_mapping(self.db, repost["id"], internal_id)

     

       461
       461
       +
       

     

       462
       462
       +
           def __delete_repost(

     

       463
       463
       +
               self, repost_id: str

     

       464
       464
       +
           ) -> tuple[models.AppBskyActorDefs.ProfileViewDetailed | None, dict | None]:

     

       465
       465
       +
               login = self.__check_login()

     

       466
       466
       +
       

     

       467
       467
       +
               repost = database.find_post(

     

       468
       468
       +
                   self.db, repost_id, self.input.user_id, self.input.service

     

       469
       469
       +
               )

     

       470
       470
       +
               if not repost:

     

       471
       471
       +
                   return None, None

     

       472
       472
       +
       

     

       473
       473
       +
               mappings = database.find_mappings(self.db, repost["id"], SERVICE, login.did)

     

       474
       474
       +
               if mappings:

     

       475
       475
       +
                   LOGGER.info("Deleting '%s'...", mappings[0][0])

     

       476
       476
       +
                   self.bsky.unrepost(mappings[0][0])

     

       477
       477
       +
                   database.delete_post(self.db, mappings[0][0], login.did, SERVICE)

     

       478
       478
       +
               return login, repost

     

       479
       479
       +
       

     

       480
       480
       +
           def delete_repost(self, repost_id: str):

     

       481
       481
       +
               self.__delete_repost(repost_id)

-167

bluesky.py

···

       1
       1
       -
       from atproto import client_utils, Client, AtUri

     

       2
       2
       -
       from atproto_client import models

     

       3
       3
       -
       

     

       4
       4
       -
       class Bluesky():

     

       5
       5
       -
           def __init__(self, client: Client) -> None:

     

       6
       6
       -
               self.client = client

     

       7
       7
       -
               

     

       8
       8
       -
           def send_video(

     

       9
       9
       -
               self, 

     

       10
       10
       -
               text: str | client_utils.TextBuilder, 

     

       11
       11
       -
               video: bytes,

     

       12
       12
       -
               video_alt: str | None = None,

     

       13
       13
       -
               video_aspect_ratio: models.AppBskyEmbedDefs.AspectRatio | None = None,

     

       14
       14
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       15
       15
       -
               langs: list[str] | None = None,

     

       16
       16
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       17
       17
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None

     

       18
       18
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       19
       19
       -
               

     

       20
       20
       -
               if video_alt is None:

     

       21
       21
       -
                   video_alt = ''

     

       22
       22
       -
       

     

       23
       23
       -
               upload = self.client.upload_blob(video)

     

       24
       24
       -
               

     

       25
       25
       -
               return self.send_post(

     

       26
       26
       -
                   text,

     

       27
       27
       -
                   reply_to=reply_to,

     

       28
       28
       -
                   embed=models.AppBskyEmbedVideo.Main(video=upload.blob, alt=video_alt, aspect_ratio=video_aspect_ratio),

     

       29
       29
       -
                   langs=langs,

     

       30
       30
       -
                   facets=facets,

     

       31
       31
       -
                   labels=labels

     

       32
       32
       -
               )

     

       33
       33
       -
           

     

       34
       34
       -
           def send_images(

     

       35
       35
       -
               self, 

     

       36
       36
       -
               text: str | client_utils.TextBuilder, 

     

       37
       37
       -
               images: list[bytes],

     

       38
       38
       -
               image_alts: list[str] | None = None,

     

       39
       39
       -
               image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] | None = None,

     

       40
       40
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       41
       41
       -
               langs: list[str] | None = None,

     

       42
       42
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       43
       43
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None

     

       44
       44
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       45
       45
       -
               

     

       46
       46
       -
               if image_alts is None:

     

       47
       47
       -
                   image_alts = [''] * len(images)

     

       48
       48
       -
               else:

     

       49
       49
       -
                   diff = len(images) - len(image_alts)

     

       50
       50
       -
                   image_alts = image_alts + [''] * diff

     

       51
       51
       -
               

     

       52
       52
       -
               if image_aspect_ratios is None:

     

       53
       53
       -
                   aligned_image_aspect_ratios = [None] * len(images)

     

       54
       54
       -
               else:

     

       55
       55
       -
                   diff = len(images) - len(image_aspect_ratios)

     

       56
       56
       -
                   aligned_image_aspect_ratios = image_aspect_ratios + [None] * diff

     

       57
       57
       -
               

     

       58
       58
       -
               uploads = [self.client.upload_blob(image) for image in images]

     

       59
       59
       -
               

     

       60
       60
       -
               embed_images = [

     

       61
       61
       -
                   models.AppBskyEmbedImages.Image(alt=alt, image=upload.blob, aspect_ratio=aspect_ratio)

     

       62
       62
       -
                   for alt, upload, aspect_ratio in zip(image_alts, uploads, aligned_image_aspect_ratios)

     

       63
       63
       -
               ]

     

       64
       64
       -
               

     

       65
       65
       -
               return self.send_post(

     

       66
       66
       -
                   text,

     

       67
       67
       -
                   reply_to=reply_to,

     

       68
       68
       -
                   embed=models.AppBskyEmbedImages.Main(images=embed_images),

     

       69
       69
       -
                   langs=langs,

     

       70
       70
       -
                   facets=facets,

     

       71
       71
       -
                   labels=labels

     

       72
       72
       -
               )

     

       73
       73
       -
           

     

       74
       74
       -
           def send_post(

     

       75
       75
       -
               self, 

     

       76
       76
       -
               text: str | client_utils.TextBuilder, 

     

       77
       77
       -
               reply_to: models.AppBskyFeedPost.ReplyRef | None = None,

     

       78
       78
       -
               embed: 

     

       79
       79
       -
                   None |

     

       80
       80
       -
                   models.AppBskyEmbedImages.Main |

     

       81
       81
       -
                   models.AppBskyEmbedExternal.Main |

     

       82
       82
       -
                   models.AppBskyEmbedRecord.Main |

     

       83
       83
       -
                   models.AppBskyEmbedRecordWithMedia.Main |

     

       84
       84
       -
                   models.AppBskyEmbedVideo.Main = None,

     

       85
       85
       -
               langs: list[str] | None = None,

     

       86
       86
       -
               facets: list[models.AppBskyRichtextFacet.Main] | None = None,

     

       87
       87
       -
               labels: models.ComAtprotoLabelDefs.SelfLabels | None = None

     

       88
       88
       -
               ) -> models.AppBskyFeedPost.CreateRecordResponse:

     

       89
       89
       -
               

     

       90
       90
       -
               if isinstance(text, client_utils.TextBuilder):

     

       91
       91
       -
                   facets = text.build_facets()

     

       92
       92
       -
                   text = text.build_text()

     

       93
       93
       -
               

     

       94
       94
       -
               repo = self.client.me and self.client.me.did

     

       95
       95
       -
               if not repo:

     

       96
       96
       -
                   raise Exception("Client not logged in!")

     

       97
       97
       -
               

     

       98
       98
       -
               if not langs:

     

       99
       99
       -
                   langs = ['en']

     

       100
       100
       -
                   

     

       101
       101
       -
               record = models.AppBskyFeedPost.Record(

     

       102
       102
       -
                   created_at=self.client.get_current_time_iso(),

     

       103
       103
       -
                   text=text,

     

       104
       104
       -
                   reply=reply_to,

     

       105
       105
       -
                   embed=embed,

     

       106
       106
       -
                   langs=langs,

     

       107
       107
       -
                   facets=facets,

     

       108
       108
       -
                   labels=labels

     

       109
       109
       -
               )

     

       110
       110
       -
               return self.client.app.bsky.feed.post.create(repo, record)

     

       111
       111
       -
           

     

       112
       112
       -
           def create_gates(self, options: dict, post_uri: str):

     

       113
       113
       -
               account = self.client.me

     

       114
       114
       -
               if not account:

     

       115
       115
       -
                   raise Exception("Client not logged in!")

     

       116
       116
       -
               

     

       117
       117
       -
               rkey = AtUri.from_str(post_uri).rkey

     

       118
       118
       -
               time = self.client.get_current_time_iso()

     

       119
       119
       -
               

     

       120
       120
       -
               thread_gate_opts = options.get('thread_gate', [])

     

       121
       121
       -
               if 'everybody' not in thread_gate_opts:

     

       122
       122
       -
                   allow = []

     

       123
       123
       -
                   if thread_gate_opts:

     

       124
       124
       -
                       if 'following' in thread_gate_opts:

     

       125
       125
       -
                           allow.append(models.AppBskyFeedThreadgate.FollowingRule())

     

       126
       126
       -
                       if 'followers' in thread_gate_opts:

     

       127
       127
       -
                           allow.append(models.AppBskyFeedThreadgate.FollowerRule())

     

       128
       128
       -
                       if 'mentioned' in thread_gate_opts:

     

       129
       129
       -
                           allow.append(models.AppBskyFeedThreadgate.MentionRule())

     

       130
       130
       -
                   

     

       131
       131
       -
                   thread_gate = models.AppBskyFeedThreadgate.Record(

     

       132
       132
       -
                       post=post_uri,

     

       133
       133
       -
                       created_at=time,

     

       134
       134
       -
                       allow=allow

     

       135
       135
       -
                   )

     

       136
       136
       -
                   

     

       137
       137
       -
                   self.client.app.bsky.feed.threadgate.create(account.did, thread_gate, rkey)

     

       138
       138
       -
                   

     

       139
       139
       -
               if options['quote_gate']:

     

       140
       140
       -
                   post_gate = models.AppBskyFeedPostgate.Record(

     

       141
       141
       -
                       post=post_uri,

     

       142
       142
       -
                       created_at=time,

     

       143
       143
       -
                       embedding_rules=[

     

       144
       144
       -
                           models.AppBskyFeedPostgate.DisableRule()

     

       145
       145
       -
                       ]

     

       146
       146
       -
                   )

     

       147
       147
       -
                   

     

       148
       148
       -
                   self.client.app.bsky.feed.postgate.create(account.did, post_gate, rkey)

     

       149
       149
       -
           

     

       150
       150
       -
       

     

       151
       151
       -
       def tokens_to_richtext(tokens: list[dict]) -> client_utils.TextBuilder | None:

     

       152
       152
       -
           builder: client_utils.TextBuilder = client_utils.TextBuilder()

     

       153
       153
       -
           

     

       154
       154
       -
           for token in tokens:

     

       155
       155
       -
               token_type = token['type']

     

       156
       156
       -
               

     

       157
       157
       -
               if token_type == 'text':

     

       158
       158
       -
                   builder.text(token['value'])

     

       159
       159
       -
               elif token_type == 'hashtag':

     

       160
       160
       -
                   builder.tag(token['value'], token['value'][1:])

     

       161
       161
       -
               elif token_type == 'link':

     

       162
       162
       -
                   builder.link(token['label'], token['value'])

     

       163
       163
       -
               else:

     

       164
       164
       -
                   # Fail on mention!

     

       165
       165
       -
                   return None

     

       166
       166
       -
           

     

       167
       167
       -
           return builder

+237

cross.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       from abc import ABC, abstractmethod

     

       3
       3
       +
       from datetime import datetime, timezone

     

       4
       4
       +
       from typing import Any, Callable

     

       5
       5
       +
       

     

       6
       6
       +
       from util.database import DataBaseWorker

     

       7
       7
       +
       from util.media import MediaInfo

     

       8
       8
       +
       from util.util import LOGGER, canonical_label

     

       9
       9
       +
       

     

       10
       10
       +
       ALTERNATE = re.compile(r"\S+|\s+")

     

       11
       11
       +
       

     

       12
       12
       +
       

     

       13
       13
       +
       # generic token

     

       14
       14
       +
       class Token:

     

       15
       15
       +
           def __init__(self, type: str) -> None:

     

       16
       16
       +
               self.type = type

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       class TextToken(Token):

     

       20
       20
       +
           def __init__(self, text: str) -> None:

     

       21
       21
       +
               super().__init__("text")

     

       22
       22
       +
               self.text = text

     

       23
       23
       +
       

     

       24
       24
       +
       

     

       25
       25
       +
       # token that represents a link to a website. e.g. [link](https://google.com/)

     

       26
       26
       +
       class LinkToken(Token):

     

       27
       27
       +
           def __init__(self, href: str, label: str) -> None:

     

       28
       28
       +
               super().__init__("link")

     

       29
       29
       +
               self.href = href

     

       30
       30
       +
               self.label = label

     

       31
       31
       +
       

     

       32
       32
       +
       

     

       33
       33
       +
       # token that represents a hashtag. e.g. #SocialMedia

     

       34
       34
       +
       class TagToken(Token):

     

       35
       35
       +
           def __init__(self, tag: str) -> None:

     

       36
       36
       +
               super().__init__("tag")

     

       37
       37
       +
               self.tag = tag

     

       38
       38
       +
       

     

       39
       39
       +
       

     

       40
       40
       +
       # token that represents a mention of a user.

     

       41
       41
       +
       class MentionToken(Token):

     

       42
       42
       +
           def __init__(self, username: str, uri: str) -> None:

     

       43
       43
       +
               super().__init__("mention")

     

       44
       44
       +
               self.username = username

     

       45
       45
       +
               self.uri = uri

     

       46
       46
       +
       

     

       47
       47
       +
       

     

       48
       48
       +
       class MediaMeta:

     

       49
       49
       +
           def __init__(self, width: int, height: int, duration: float) -> None:

     

       50
       50
       +
               self.width = width

     

       51
       51
       +
               self.height = height

     

       52
       52
       +
               self.duration = duration

     

       53
       53
       +
       

     

       54
       54
       +
           def get_width(self) -> int:

     

       55
       55
       +
               return self.width

     

       56
       56
       +
       

     

       57
       57
       +
           def get_height(self) -> int:

     

       58
       58
       +
               return self.height

     

       59
       59
       +
       

     

       60
       60
       +
           def get_duration(self) -> float:

     

       61
       61
       +
               return self.duration

     

       62
       62
       +
       

     

       63
       63
       +
       

     

       64
       64
       +
       class Post(ABC):

     

       65
       65
       +
           @abstractmethod

     

       66
       66
       +
           def get_id(self) -> str:

     

       67
       67
       +
               return ""

     

       68
       68
       +
       

     

       69
       69
       +
           @abstractmethod

     

       70
       70
       +
           def get_parent_id(self) -> str | None:

     

       71
       71
       +
               pass

     

       72
       72
       +
       

     

       73
       73
       +
           @abstractmethod

     

       74
       74
       +
           def get_tokens(self) -> list[Token]:

     

       75
       75
       +
               pass

     

       76
       76
       +
       

     

       77
       77
       +
           # returns input text type.

     

       78
       78
       +
           # text/plain, text/markdown, text/x.misskeymarkdown

     

       79
       79
       +
           @abstractmethod

     

       80
       80
       +
           def get_text_type(self) -> str:

     

       81
       81
       +
               pass

     

       82
       82
       +
       

     

       83
       83
       +
           # post iso timestamp

     

       84
       84
       +
           @abstractmethod

     

       85
       85
       +
           def get_timestamp(self) -> str:

     

       86
       86
       +
               pass

     

       87
       87
       +
       

     

       88
       88
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       89
       89
       +
               return []

     

       90
       90
       +
       

     

       91
       91
       +
           def get_spoiler(self) -> str | None:

     

       92
       92
       +
               return None

     

       93
       93
       +
       

     

       94
       94
       +
           def get_languages(self) -> list[str]:

     

       95
       95
       +
               return []

     

       96
       96
       +
       

     

       97
       97
       +
           def is_sensitive(self) -> bool:

     

       98
       98
       +
               return False

     

       99
       99
       +
       

     

       100
       100
       +
           def get_post_url(self) -> str | None:

     

       101
       101
       +
               return None

     

       102
       102
       +
       

     

       103
       103
       +
       

     

       104
       104
       +
       # generic input service.

     

       105
       105
       +
       # user and service for db queries

     

       106
       106
       +
       class Input:

     

       107
       107
       +
           def __init__(

     

       108
       108
       +
               self, service: str, user_id: str, settings: dict, db: DataBaseWorker

     

       109
       109
       +
           ) -> None:

     

       110
       110
       +
               self.service = service

     

       111
       111
       +
               self.user_id = user_id

     

       112
       112
       +
               self.settings = settings

     

       113
       113
       +
               self.db = db

     

       114
       114
       +
       

     

       115
       115
       +
           async def listen(self, outputs: list, handler: Callable[[Post], Any]):

     

       116
       116
       +
               pass

     

       117
       117
       +
       

     

       118
       118
       +
       

     

       119
       119
       +
       class Output:

     

       120
       120
       +
           def __init__(self, input: Input, settings: dict, db: DataBaseWorker) -> None:

     

       121
       121
       +
               self.input = input

     

       122
       122
       +
               self.settings = settings

     

       123
       123
       +
               self.db = db

     

       124
       124
       +
       

     

       125
       125
       +
           def accept_post(self, post: Post):

     

       126
       126
       +
               LOGGER.warning('Not Implemented.. "posted" %s', post.get_id())

     

       127
       127
       +
       

     

       128
       128
       +
           def delete_post(self, identifier: str):

     

       129
       129
       +
               LOGGER.warning('Not Implemented.. "deleted" %s', identifier)

     

       130
       130
       +
       

     

       131
       131
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       132
       132
       +
               LOGGER.warning('Not Implemented.. "reblogged" %s, %s', repost_id, reposted_id)

     

       133
       133
       +
       

     

       134
       134
       +
           def delete_repost(self, repost_id: str):

     

       135
       135
       +
               LOGGER.warning('Not Implemented.. "removed reblog" %s', repost_id)

     

       136
       136
       +
       

     

       137
       137
       +
       

     

       138
       138
       +
       def test_filters(tokens: list[Token], filters: list[re.Pattern[str]]):

     

       139
       139
       +
           if not tokens or not filters:

     

       140
       140
       +
               return True

     

       141
       141
       +
       

     

       142
       142
       +
           markdown = ""

     

       143
       143
       +
       

     

       144
       144
       +
           for token in tokens:

     

       145
       145
       +
               if isinstance(token, TextToken):

     

       146
       146
       +
                   markdown += token.text

     

       147
       147
       +
               elif isinstance(token, LinkToken):

     

       148
       148
       +
                   markdown += f"[{token.label}]({token.href})"

     

       149
       149
       +
               elif isinstance(token, TagToken):

     

       150
       150
       +
                   markdown += "#" + token.tag

     

       151
       151
       +
               elif isinstance(token, MentionToken):

     

       152
       152
       +
                   markdown += token.username

     

       153
       153
       +
       

     

       154
       154
       +
           for filter in filters:

     

       155
       155
       +
               if filter.search(markdown):

     

       156
       156
       +
                   return False

     

       157
       157
       +
       

     

       158
       158
       +
           return True

     

       159
       159
       +
       

     

       160
       160
       +
       

     

       161
       161
       +
       def split_tokens(

     

       162
       162
       +
           tokens: list[Token], max_chars: int, max_link_len: int = 35

     

       163
       163
       +
       ) -> list[list[Token]]:

     

       164
       164
       +
           def new_block():

     

       165
       165
       +
               nonlocal blocks, block, length

     

       166
       166
       +
               if block:

     

       167
       167
       +
                   blocks.append(block)

     

       168
       168
       +
               block = []

     

       169
       169
       +
               length = 0

     

       170
       170
       +
       

     

       171
       171
       +
           def append_text(text_segment):

     

       172
       172
       +
               nonlocal block

     

       173
       173
       +
               # if the last element in the current block is also text, just append to it

     

       174
       174
       +
               if block and isinstance(block[-1], TextToken):

     

       175
       175
       +
                   block[-1].text += text_segment

     

       176
       176
       +
               else:

     

       177
       177
       +
                   block.append(TextToken(text_segment))

     

       178
       178
       +
       

     

       179
       179
       +
           blocks: list[list[Token]] = []

     

       180
       180
       +
           block: list[Token] = []

     

       181
       181
       +
           length = 0

     

       182
       182
       +
       

     

       183
       183
       +
           for tk in tokens:

     

       184
       184
       +
               if isinstance(tk, TagToken):

     

       185
       185
       +
                   tag_len = 1 + len(tk.tag)  # (#) + tag

     

       186
       186
       +
                   if length + tag_len > max_chars:

     

       187
       187
       +
                       new_block()  # create new block if the current one is too large

     

       188
       188
       +
       

     

       189
       189
       +
                   block.append(tk)

     

       190
       190
       +
                   length += tag_len

     

       191
       191
       +
               elif isinstance(tk, LinkToken):  # TODO labels should proably be split too

     

       192
       192
       +
                   link_len = len(tk.label)

     

       193
       193
       +
                   if canonical_label(

     

       194
       194
       +
                       tk.label, tk.href

     

       195
       195
       +
                   ):  # cut down the link if the label is canonical

     

       196
       196
       +
                       link_len = min(link_len, max_link_len)

     

       197
       197
       +
       

     

       198
       198
       +
                   if length + link_len > max_chars:

     

       199
       199
       +
                       new_block()

     

       200
       200
       +
                   block.append(tk)

     

       201
       201
       +
                   length += link_len

     

       202
       202
       +
               elif isinstance(tk, TextToken):

     

       203
       203
       +
                   segments: list[str] = ALTERNATE.findall(tk.text)

     

       204
       204
       +
       

     

       205
       205
       +
                   for seg in segments:

     

       206
       206
       +
                       seg_len: int = len(seg)

     

       207
       207
       +
                       if length + seg_len <= max_chars - (0 if seg.isspace() else 1):

     

       208
       208
       +
                           append_text(seg)

     

       209
       209
       +
                           length += seg_len

     

       210
       210
       +
                           continue

     

       211
       211
       +
       

     

       212
       212
       +
                       if length > 0:

     

       213
       213
       +
                           new_block()

     

       214
       214
       +
       

     

       215
       215
       +
                       if not seg.isspace():

     

       216
       216
       +
                           while len(seg) > max_chars - 1:

     

       217
       217
       +
                               chunk = seg[: max_chars - 1] + "-"

     

       218
       218
       +
                               append_text(chunk)

     

       219
       219
       +
                               new_block()

     

       220
       220
       +
                               seg = seg[max_chars - 1 :]

     

       221
       221
       +
                       else:

     

       222
       222
       +
                           while len(seg) > max_chars:

     

       223
       223
       +
                               chunk = seg[:max_chars]

     

       224
       224
       +
                               append_text(chunk)

     

       225
       225
       +
                               new_block()

     

       226
       226
       +
                               seg = seg[max_chars:]

     

       227
       227
       +
       

     

       228
       228
       +
                       if seg:

     

       229
       229
       +
                           append_text(seg)

     

       230
       230
       +
                           length = len(seg)

     

       231
       231
       +
               else:  # TODO fix mentions

     

       232
       232
       +
                   block.append(tk)

     

       233
       233
       +
       

     

       234
       234
       +
           if block:

     

       235
       235
       +
               blocks.append(block)

     

       236
       236
       +
       

     

       237
       237
       +
           return blocks

-118

database.py

···

       1
       1
       -
       import sqlite3

     

       2
       2
       -
       import json

     

       3
       3
       -
       

     

       4
       4
       -
       import sqlite3

     

       5
       5
       -
       import json

     

       6
       6
       -
       

     

       7
       7
       -
       class DataBase():

     

       8
       8
       -
       

     

       9
       9
       -
           def __init__(self, path: str) -> None:

     

       10
       10
       -
               self.path = path

     

       11
       11
       -
               connection = sqlite3.connect(self.path, autocommit=True)

     

       12
       12
       -
               cursor = connection.cursor()

     

       13
       13
       -
               cursor.execute('''

     

       14
       14
       -
                   CREATE TABLE IF NOT EXISTS posts (

     

       15
       15
       -
                       id TEXT,

     

       16
       16
       -
                       user_id TEXT,

     

       17
       17
       -
                       data TEXT,

     

       18
       18
       -
                       PRIMARY KEY (id, user_id)

     

       19
       19
       -
                   )

     

       20
       20
       -
               ''')

     

       21
       21
       -
               cursor.close()

     

       22
       22
       -
           

     

       23
       23
       -
           def connect(self) -> sqlite3.Connection:

     

       24
       24
       -
               return sqlite3.connect(self.path, autocommit=True)

     

       25
       25
       -
       

     

       26
       26
       -
           def put_post(self, db: sqlite3.Connection, user_id: str, id: str, data: dict):

     

       27
       27
       -
               cursor = db.cursor()

     

       28
       28
       -
               cursor.execute('''

     

       29
       29
       -
                   INSERT OR REPLACE INTO posts (id, user_id, data) VALUES (?, ?, ?)

     

       30
       30
       -
               ''', (id, user_id, json.dumps(data)))

     

       31
       31
       -
               cursor.close()

     

       32
       32
       -
       

     

       33
       33
       -
           def del_post(self, db: sqlite3.Connection, user_id: str, id: str):

     

       34
       34
       -
               cursor = db.cursor()

     

       35
       35
       -
               cursor.execute('''

     

       36
       36
       -
                   DELETE FROM posts WHERE id = ? AND user_id = ?

     

       37
       37
       -
               ''', (id, user_id))

     

       38
       38
       -
               cursor.close()

     

       39
       39
       -
               

     

       40
       40
       -
           def read_data(self, db: sqlite3.Connection, user_id: str, id: str) -> dict | None:

     

       41
       41
       -
               cursor = db.cursor()

     

       42
       42
       -
               cursor.execute('''

     

       43
       43
       -
                   SELECT data FROM posts WHERE id = ? AND user_id = ?

     

       44
       44
       -
               ''', (id, user_id))

     

       45
       45
       -
               row = cursor.fetchone()

     

       46
       46
       -
               cursor.close()

     

       47
       47
       -
               if row:

     

       48
       48
       -
                   data_json = row[0]

     

       49
       49
       -
                   return json.loads(data_json)

     

       50
       50
       -
               return None

     

       51
       51
       -
       

     

       52
       52
       -
           def get_all_children(self, db: sqlite3.Connection, user_id: str, id: str) -> dict[str, dict]:

     

       53
       53
       -
               cursor = db.cursor()

     

       54
       54
       -
               cursor.execute('''

     

       55
       55
       -
                   WITH RECURSIVE thread_cte (id, user_id, data, current_post_uri) AS (

     

       56
       56
       -
                       SELECT

     

       57
       57
       -
                           T1.id,

     

       58
       58
       -
                           T1.user_id,

     

       59
       59
       -
                           T1.data,

     

       60
       60
       -
                           json_extract(

     

       61
       61
       -
                               T1.data,

     

       62
       62
       -
                               '$.mapped_post_refs[' || (json_array_length(T1.data, '$.mapped_post_refs') - 1) || '].uri'

     

       63
       63
       -
                           ) AS current_post_uri

     

       64
       64
       -
                       FROM

     

       65
       65
       -
                           posts AS T1

     

       66
       66
       -
                       WHERE

     

       67
       67
       -
                           T1.id = ? AND T1.user_id = ?

     

       68
       68
       -
       

     

       69
       69
       -
                       UNION ALL

     

       70
       70
       -
       

     

       71
       71
       -
                       SELECT

     

       72
       72
       -
                           C.id,

     

       73
       73
       -
                           C.user_id,

     

       74
       74
       -
                           C.data,

     

       75
       75
       -
                           json_extract(

     

       76
       76
       -
                               C.data,

     

       77
       77
       -
                               '$.mapped_post_refs[' || (json_array_length(C.data, '$.mapped_post_refs') - 1) || '].uri'

     

       78
       78
       -
                           ) AS current_post_uri

     

       79
       79
       -
                       FROM

     

       80
       80
       -
                           posts AS C

     

       81
       81
       -
                       JOIN

     

       82
       82
       -
                           thread_cte AS P ON json_extract(C.data, '$.parent_ref.uri') = P.current_post_uri

     

       83
       83
       -
                       WHERE

     

       84
       84
       -
                           C.user_id = ?

     

       85
       85
       -
                   )

     

       86
       86
       -
                   SELECT id, data FROM thread_cte;

     

       87
       87
       -
               ''', (id, user_id, user_id))

     

       88
       88
       -
               raw_data = cursor.fetchall()

     

       89
       89
       -
               cursor.close()

     

       90
       90
       -
       

     

       91
       91
       -
               if not raw_data:

     

       92
       92
       -
                   return {}

     

       93
       93
       -
       

     

       94
       94
       -
               data: dict[str, dict] = {}

     

       95
       95
       -
               for post_id, post_data in raw_data:

     

       96
       96
       -
                   data[post_id] = json.loads(post_data)

     

       97
       97
       -
       

     

       98
       98
       -
               return data

     

       99
       99
       -
       

     

       100
       100
       -
       class UserScopedDB:

     

       101
       101
       -
           def __init__(self, db: DataBase, user_id: str):

     

       102
       102
       -
               self.db = db

     

       103
       103
       -
               self.user_id = user_id

     

       104
       104
       -
       

     

       105
       105
       -
           def connect(self) -> sqlite3.Connection:

     

       106
       106
       -
               return self.db.connect()

     

       107
       107
       -
       

     

       108
       108
       -
           def put_post(self, db: sqlite3.Connection, id: str, data: dict):

     

       109
       109
       -
               return self.db.put_post(db, self.user_id, id, data)

     

       110
       110
       -
       

     

       111
       111
       -
           def del_post(self, db: sqlite3.Connection, id: str):

     

       112
       112
       -
               return self.db.del_post(db, self.user_id, id)

     

       113
       113
       -
       

     

       114
       114
       -
           def read_data(self, db: sqlite3.Connection, id: str) -> dict | None:

     

       115
       115
       -
               return self.db.read_data(db, self.user_id, id)

     

       116
       116
       -
       

     

       117
       117
       -
           def get_all_children(self, db: sqlite3.Connection, id: str) -> dict[str, dict]:

     

       118
       118
       -
               return self.db.get_all_children(db, self.user_id, id)

+133 -383

main.py

···

       1
       1
       -
       import click

     

       1
       1
       +
       import asyncio

     

       2
       2
        
       import json

     

       3
       3
       -
       import asyncio, threading, queue

     

       4
       4
       -
       from atproto import IdResolver, Client, client_utils

     

       5
       5
       -
       import atproto_client.models as models

     

       6
       6
       -
       import util, mastodon, bluesky, database

     

       7
       3
        
       import os

     

       8
       8
       -
       import media_util

     

       4
       4
       +
       import queue

     

       5
       5
       +
       import threading

     

       9
       6
        
       import traceback

     

       10
       7
        
       

     

       11
       11
       -
       ADULT_LABEL = ["sexual content", "nsfw"]

     

       12
       12
       -
       PORN_LABEL = ["porn", "yiff"]

     

       8
       8
       +
       import cross

     

       9
       9
       +
       import util.database as database

     

       10
       10
       +
       from bluesky.input import BlueskyJetstreamInput

     

       11
       11
       +
       from bluesky.output import BlueskyOutput, BlueskyOutputOptions

     

       12
       12
       +
       from mastodon.input import MastodonInput, MastodonInputOptions

     

       13
       13
       +
       from mastodon.output import MastodonOutput

     

       14
       14
       +
       from misskey.input import MisskeyInput

     

       15
       15
       +
       from util.util import LOGGER, as_json

     

       13
       16
        
       

     

       14
       14
       -
       class SocketListener():

     

       15
       15
       -
           def __init__(self, user_id: str, atproto: Client, settings: dict, db_path: str) -> None:

     

       16
       16
       -
               self.user_id = user_id

     

       17
       17
       -
               self.atp = bluesky.Bluesky(atproto)

     

       18
       18
       -
               self.settings = settings

     

       19
       19
       -
               self.db = database.UserScopedDB(database.DataBase(db_path), user_id)

     

       20
       20
       -
           

     

       21
       21
       -
           def create_post_records(self, status: dict) -> list[models.AppBskyFeedPost.CreateRecordResponse] | None:

     

       22
       22
       -
               tokens: list[dict] = util.tokenize_html(status['content'])

     

       23
       23
       -
               

     

       24
       24
       -
               label_text: set[str] = set()

     

       25
       25
       -
               status_spoiler = status['spoiler_text']

     

       26
       26
       -
               if status_spoiler:

     

       27
       27
       -
                   tokens.insert(0, {"type": "text", "value": "CW: " + status_spoiler + '\n\n'})

     

       28
       28
       -
                   label_text.add('graphic-media')

     

       29
       29
       -
               

     

       30
       30
       -
               if any(tag in status_spoiler for tag in ADULT_LABEL):

     

       31
       31
       -
                   label_text.add('sexual')

     

       32
       32
       -
               

     

       33
       33
       -
               if any(tag in status_spoiler for tag in PORN_LABEL):

     

       34
       34
       -
                   label_text.add('porn')

     

       35
       35
       -
               

     

       36
       36
       -
               if status['sensitive']:

     

       37
       37
       -
                   label_text.add('graphic-media')

     

       38
       38
       -
               

     

       39
       39
       -
               labels = models.ComAtprotoLabelDefs.SelfLabels(values=[models.ComAtprotoLabelDefs.SelfLabel(val=label) for label in label_text])

     

       40
       40
       -
               

     

       41
       41
       -
               split_tokens: list[list[dict]] = util.split_tokens(tokens, 300)

     

       42
       42
       -
               

     

       43
       43
       -
               post_text: list[client_utils.TextBuilder] = []

     

       44
       44
       -
               for funnel in split_tokens:

     

       45
       45
       -
                   rich_text = bluesky.tokens_to_richtext(funnel)

     

       46
       46
       -
                   

     

       47
       47
       -
                   if rich_text is None:

     

       48
       48
       -
                       click.echo(f"Skipping '{status["id"]}' as it contains invalid rich text types!")

     

       49
       49
       -
                       return None

     

       50
       50
       -
                   post_text.append(rich_text)

     

       51
       51
       -
                   

     

       52
       52
       -
               if not post_text:

     

       53
       53
       -
                   post_text = [client_utils.TextBuilder().text('')]

     

       54
       54
       -
               

     

       55
       55
       -
               records: list[models.AppBskyFeedPost.CreateRecordResponse] = []

     

       56
       56
       -
               

     

       57
       57
       -
               in_reply_to_id: str = status['in_reply_to_id']

     

       58
       58
       -
               

     

       59
       59
       -
               root_ref = None

     

       60
       60
       -
               reply_ref = None

     

       61
       61
       -
               if in_reply_to_id:

     

       62
       62
       -
                   db = self.db.connect()

     

       63
       63
       -
                   data: dict | None = self.db.read_data(db, in_reply_to_id)

     

       64
       64
       -
                   db.close()

     

       65
       65
       -
                   

     

       66
       66
       -
                   if data is not None:

     

       67
       67
       -
                       root_data = data['root_ref']

     

       68
       68
       -
                       if not root_data:

     

       69
       69
       -
                           root_data = data['mapped_post_refs'][0]

     

       70
       70
       -
                       

     

       71
       71
       -
                       reply_data = data['mapped_post_refs'][-1]

     

       72
       72
       -
                       

     

       73
       73
       -
                       root_post = models.AppBskyFeedPost.CreateRecordResponse(uri=str(root_data['uri']), cid=str(root_data['cid']))

     

       74
       74
       -
                       root_ref = models.create_strong_ref(root_post)

     

       75
       75
       -
                       

     

       76
       76
       -
                       reply_post = models.AppBskyFeedPost.CreateRecordResponse(uri=str(reply_data['uri']), cid=str(reply_data['cid']))

     

       77
       77
       -
                       reply_ref = models.create_strong_ref(reply_post)

     

       78
       78
       -
               

     

       79
       79
       -
               attachments: list[dict] = status['media_attachments']

     

       80
       80
       -
               if not attachments:

     

       81
       81
       -
                   for post in post_text:

     

       82
       82
       -
                       if reply_ref and root_ref:

     

       83
       83
       -
                           new_post = self.atp.send_post(post, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       84
       84
       -
                               parent=reply_ref,

     

       85
       85
       -
                               root=root_ref

     

       86
       86
       -
                           ), labels=labels)

     

       87
       87
       -
                       else:

     

       88
       88
       -
                           new_post = self.atp.send_post(post, labels=labels)

     

       89
       89
       -
                           root_ref = models.create_strong_ref(new_post)

     

       90
       90
       -
                       

     

       91
       91
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       92
       92
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       93
       93
       -
                       records.append(new_post)

     

       94
       94
       -
                   

     

       95
       95
       -
                   return records

     

       96
       96
       -
               elif len(attachments) <= 4:

     

       97
       97
       -
                   if len(attachments) == 1 and attachments[0]['type'] == 'video':

     

       98
       98
       -
                       video: dict = attachments[0]

     

       99
       99
       -
                       

     

       100
       100
       -
                       video_io = media_util.download_blob(video['url'], max_bytes=100_000_000)

     

       101
       101
       -
                       if not video_io:

     

       102
       102
       -
                           click.echo(f"Skipping post_id '{status['id']}', failed to download attachment!")

     

       103
       103
       -
                           return None

     

       104
       104
       -
                       

     

       105
       105
       -
                       if len(video_io) > 100_000_000:

     

       106
       106
       -
                           click.echo(f"Skipping post_id '{status['id']}'. Video file too large")

     

       107
       107
       -
                           return None

     

       108
       108
       -
                       

     

       109
       109
       -
                       # some mastodon api implementations don't seem to provide video meta

     

       110
       110
       -
                       # try to probe it with ffmpeg

     

       111
       111
       -
                       meta = media_util.get_video_meta(video_io)

     

       112
       112
       -
                       if meta.get('duration', -1) > 180:

     

       113
       113
       -
                           click.echo(f"Skipping post_id '{status["id"]}'. Video attachment too long!")

     

       114
       114
       -
                           return None

     

       115
       115
       -
                       

     

       116
       116
       -
                       aspect_ratio = models.AppBskyEmbedDefs.AspectRatio(width=meta['width'], height=meta['height'])

     

       117
       117
       -
                       

     

       118
       118
       -
                       new_post = self.atp.send_video(

     

       119
       119
       -
                           text=post_text[0],

     

       120
       120
       -
                           video=video_io,

     

       121
       121
       -
                           video_aspect_ratio=aspect_ratio,

     

       122
       122
       -
                           video_alt=video['description'] if video['description'] else '',

     

       123
       123
       -
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       124
       124
       -
                               parent=reply_ref,

     

       125
       125
       -
                               root=root_ref

     

       126
       126
       -
                           ) if root_ref and reply_ref else None,

     

       127
       127
       -
                           labels=labels

     

       128
       128
       -
                       )

     

       129
       129
       -
                       if not root_ref:

     

       130
       130
       -
                           root_ref = models.create_strong_ref(new_post)

     

       131
       131
       -
                           

     

       132
       132
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       133
       133
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       134
       134
       -
                   else:

     

       135
       135
       -
                       # check if all attachments are images.

     

       136
       136
       -
                       # bluesky doesn't support gifv and unknown (TODO link the file)

     

       137
       137
       -
                       for attachment in attachments:

     

       138
       138
       -
                           if attachment['type'] != 'image':

     

       139
       139
       -
                               click.echo(f"Skipping post_id '{status['id']}'. Attachment type mismatch. got: '{attachment['type']}' expected: 'image'")

     

       140
       140
       -
                               return None

     

       141
       141
       -
                           

     

       142
       142
       -
                       images: list[bytes] = []

     

       143
       143
       -
                       image_alts: list[str] = []

     

       144
       144
       -
                       image_aspect_ratios: list[models.AppBskyEmbedDefs.AspectRatio] = []

     

       145
       145
       -
                       for attachment in attachments:

     

       146
       146
       -
                           

     

       147
       147
       -
                           image_io = media_util.download_blob(attachment['url'], max_bytes=2_000_000)

     

       148
       148
       -
                           if not image_io:

     

       149
       149
       -
                               click.echo(f"Skipping post_id '{status['id']}', failed to download attachment!")

     

       150
       150
       -
                               return None

     

       151
       151
       -
                           

     

       152
       152
       -
                           # Try to compress image if it's too large

     

       153
       153
       -
                           if len(image_io) > 1_000_000:

     

       154
       154
       -
                               click.echo(f"Trying to compress {attachment['url']}..")

     

       155
       155
       -
                               image_io = media_util.compress_image(image_io)

     

       156
       156
       -
                               if len(image_io) > 1_000_000:

     

       157
       157
       -
                                   click.echo(f"Skipping post_id '{status['id']}', media attachment still too large after compression!")

     

       158
       158
       -
                                   return None

     

       159
       159
       -
                               

     

       160
       160
       -
                           meta = util.safe_get(attachment, 'meta', {}).get('original')

     

       161
       161
       -
                           

     

       162
       162
       -
                           # some mastodon api implementations don't seem to provide image meta

     

       163
       163
       -
                           # try to probe it with ffmpeg

     

       164
       164
       -
                           if not meta:

     

       165
       165
       -
                               meta = media_util.get_image_meta(image_io)

     

       166
       166
       -
                           

     

       167
       167
       -
                           images.append(image_io)

     

       168
       168
       -
                           image_alts.append(attachment['description'] if attachment['description'] else '')

     

       169
       169
       -
                           image_aspect_ratios.append(models.AppBskyEmbedDefs.AspectRatio(width=meta['width'], height=meta['height']))

     

       170
       170
       -
                       

     

       171
       171
       -
                       new_post = self.atp.send_images(

     

       172
       172
       -
                           text=post_text[0],

     

       173
       173
       -
                           images=images,

     

       174
       174
       -
                           image_alts=image_alts,

     

       175
       175
       -
                           image_aspect_ratios=image_aspect_ratios,

     

       176
       176
       -
                           reply_to= models.AppBskyFeedPost.ReplyRef(

     

       177
       177
       -
                               parent=reply_ref,

     

       178
       178
       -
                               root=root_ref

     

       179
       179
       -
                           ) if root_ref and reply_ref else None, 

     

       180
       180
       -
                           labels=labels

     

       181
       181
       -
                       )

     

       182
       182
       -
                       if not root_ref:

     

       183
       183
       -
                           root_ref = models.create_strong_ref(new_post)

     

       184
       184
       -
                           

     

       185
       185
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       186
       186
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       187
       187
       -
                   

     

       188
       188
       -
                   records.append(new_post)

     

       189
       189
       -
                   for post in post_text[1:]:

     

       190
       190
       -
                       new_post = self.atp.send_post(post, reply_to=models.AppBskyFeedPost.ReplyRef(

     

       191
       191
       -
                           parent=reply_ref,

     

       192
       192
       -
                           root=root_ref

     

       193
       193
       -
                       ), labels=labels)

     

       194
       194
       -
                       self.atp.create_gates(self.settings.get('bluesky', {}), new_post.uri)

     

       195
       195
       -
                       

     

       196
       196
       -
                       reply_ref = models.create_strong_ref(new_post)

     

       197
       197
       -
                       records.append(new_post)

     

       198
       198
       -
                   

     

       199
       199
       -
                   return records

     

       200
       200
       -
               else:

     

       201
       201
       -
                   click.echo(f"Skipping post_id '{status['id']}'. Too many attachments!")

     

       202
       202
       -
                   return records if records else None

     

       203
       203
       -
           

     

       204
       204
       -
           def on_update(self, status: dict):

     

       205
       205
       -
               if util.safe_get(status, 'account', {})['id'] != self.user_id:

     

       206
       206
       -
                   return

     

       207
       207
       -
               

     

       208
       208
       -
               if status['reblog'] or status['poll']:

     

       209
       209
       -
                   # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       210
       210
       -
                   # we don't handle reblogs. possible with bridgy(?) and self

     

       211
       211
       -
                   return

     

       17
       17
       +
       DEFAULT_SETTINGS: dict = {

     

       18
       18
       +
           "input": {

     

       19
       19
       +
               "type": "mastodon-wss",

     

       20
       20
       +
               "instance": "env:MASTODON_INSTANCE",

     

       21
       21
       +
               "token": "env:MASTODON_TOKEN",

     

       22
       22
       +
               "options": MastodonInputOptions({}),

     

       23
       23
       +
           },

     

       24
       24
       +
           "outputs": [

     

       25
       25
       +
               {

     

       26
       26
       +
                   "type": "bluesky",

     

       27
       27
       +
                   "handle": "env:BLUESKY_HANDLE",

     

       28
       28
       +
                   "app-password": "env:BLUESKY_APP_PASSWORD",

     

       29
       29
       +
                   "options": BlueskyOutputOptions({}),

     

       30
       30
       +
               }

     

       31
       31
       +
           ],

     

       32
       32
       +
       }

     

       212
       33
        
       

     

       213
       213
       -
               in_reply: str | None = status['in_reply_to_id']

     

       214
       214
       -
               in_reply_to: str | None = status['in_reply_to_account_id']

     

       215
       215
       -
               if in_reply_to and in_reply_to != self.user_id:

     

       216
       216
       -
                   # We don't support replies. possible with bridgy(?)

     

       217
       217
       -
                   return

     

       218
       218
       -
               

     

       219
       219
       -
               if status['visibility'] not in ['public', 'unlisted']:

     

       220
       220
       -
                   # Skip f/o and direct posts

     

       221
       221
       -
                   return

     

       222
       222
       -
               

     

       223
       223
       -
               click.echo(f"Got 'update' event for post '{status['id']}'")

     

       224
       224
       -
               

     

       225
       225
       -
               db = self.db.connect()

     

       226
       226
       -
               if in_reply:

     

       227
       227
       -
                   data: dict | None = self.db.read_data(db, in_reply)

     

       228
       228
       -
                   if not data:

     

       229
       229
       -
                       click.echo(f"Post '{status['id']}' is missing parent in the database!")

     

       230
       230
       -
                       return

     

       231
       231
       -
               db.close()

     

       232
       232
       -
               

     

       233
       233
       -
               records = self.create_post_records(status)

     

       234
       234
       -
               if records is None:

     

       235
       235
       -
                   click.echo(f"Skipped crossposting '{status['id']}' due to above erros..")

     

       236
       236
       -
                   return

     

       237
       237
       -
               

     

       238
       238
       -
               refs: list[dict] = []

     

       239
       239
       -
               

     

       240
       240
       -
               for record in records:

     

       241
       241
       -
                   refs.append({'cid': record.cid, 'uri': record.uri})

     

       242
       242
       -
               

     

       243
       243
       -
               db = self.db.connect()

     

       244
       244
       -
               if not in_reply:

     

       245
       245
       -
                   self.db.put_post(db, status['id'], {

     

       246
       246
       -
                       'parent_ref': None,

     

       247
       247
       -
                       'root_ref': None,

     

       248
       248
       -
                       'mapped_post_refs': refs

     

       249
       249
       -
                   })

     

       250
       250
       -
               else:

     

       251
       251
       -
                   self.db.put_post(db, status['id'], {

     

       252
       252
       -
                       'parent_ref': data['mapped_post_refs'][-1],

     

       253
       253
       -
                       'root_ref': data['mapped_post_refs'][-1],

     

       254
       254
       -
                       'mapped_post_refs': refs

     

       255
       255
       -
                   })

     

       256
       256
       -
               db.close()

     

       257
       257
       -
           

     

       258
       258
       -
           def on_delete(self, id: str):

     

       259
       259
       -
               db = self.db.connect()

     

       260
       260
       -
               post_data = self.db.read_data(db, id)

     

       261
       261
       -
               

     

       262
       262
       -
               if not post_data:

     

       263
       263
       -
                   return

     

       264
       264
       -
               

     

       265
       265
       -
               click.echo(f"Got 'delete' event for post '{id}'...")

     

       266
       266
       -
               

     

       267
       267
       -
               for ref in post_data['mapped_post_refs']:

     

       268
       268
       -
                   self.atp.client.delete_post(ref['uri'])

     

       269
       269
       -
               

     

       270
       270
       -
               children: dict[str, dict] = self.db.get_all_children(db, id)

     

       271
       271
       -
               for id, data in children.items():

     

       272
       272
       -
                   for ref in data['mapped_post_refs']:

     

       273
       273
       -
                       self.atp.client.delete_post(ref['uri'])

     

       274
       274
       -
                   self.db.del_post(db, id)

     

       275
       275
       -
               self.db.del_post(db, id)

     

       276
       276
       -
               

     

       277
       277
       -
               db.close()

     

       278
       278
       -
               click.echo(f"Removed post '{id}' and {len(children.items())} replies")

     

       279
       279
       -
                   

     

       280
       280
       -
           # TODO Handle edits

     

       281
       281
       -
           # The issue is that since there are no edits on bluesky, 

     

       282
       282
       -
           # we have to recreate the records while keeping the media in tact.

     

       283
       283
       -
           # also, since the db only stores post relations, we have to pull all the replies from masto and the pds.

     

       284
       284
       -
           def on_status_update(self, status: dict):

     

       285
       285
       -
               if status.get('account', {})['id'] != self.user_id:

     

       286
       286
       -
                   return

     

       287
       287
       -
               if status.get('in_reply_to_account_id') != self.user_id:

     

       288
       288
       -
                   return

     

       289
       289
       -
               

     

       290
       290
       -
               click.echo(f"Got 'status.update' event for post '{status['id']}'")

     

       34
       34
       +
       INPUTS = {

     

       35
       35
       +
           "mastodon-wss": lambda settings, db: MastodonInput(settings, db),

     

       36
       36
       +
           "misskey-wss": lambda settigs, db: MisskeyInput(settigs, db),

     

       37
       37
       +
           "bluesky-jetstream-wss": lambda settings, db: BlueskyJetstreamInput(settings, db),

     

       38
       38
       +
       }

     

       39
       39
       +
       

     

       40
       40
       +
       OUTPUTS = {

     

       41
       41
       +
           "bluesky": lambda input, settings, db: BlueskyOutput(input, settings, db),

     

       42
       42
       +
           "mastodon": lambda input, settings, db: MastodonOutput(input, settings, db),

     

       43
       43
       +
       }

     

       44
       44
       +
       

     

       45
       45
       +
       

     

       46
       46
       +
       def execute(data_dir):

     

       47
       47
       +
           if not os.path.exists(data_dir):

     

       48
       48
       +
               os.makedirs(data_dir)

     

       291
       49
        
       

     

       292
       292
       -
       @click.group()

     

       293
       293
       -
       def main():

     

       294
       294
       -
           pass

     

       50
       50
       +
           settings_path = os.path.join(data_dir, "settings.json")

     

       51
       51
       +
           database_path = os.path.join(data_dir, "data.db")

     

       295
       52
        
       

     

       296
       296
       -
       @main.command('run')

     

       297
       297
       -
       @click.option(

     

       298
       298
       -
           "-I", "--instance",

     

       299
       299
       -
           envvar="MASTODON_INSTANCE",

     

       300
       300
       -
           required=True,

     

       301
       301
       -
           help="Mastodon compatible instance domain (e.g. https://mastodon.social)"

     

       302
       302
       -
       )

     

       303
       303
       -
       @click.option(

     

       304
       304
       -
           "-T", "--token",

     

       305
       305
       -
           envvar="MASTODON_TOKEN",

     

       306
       306
       -
           required=True,

     

       307
       307
       -
           help="Mastodon access token"

     

       308
       308
       -
       )

     

       309
       309
       -
       @click.option(

     

       310
       310
       -
           "-H", "--handle",

     

       311
       311
       -
           envvar="ATPROTO_HANDLE",

     

       312
       312
       -
           required=True,

     

       313
       313
       -
           help="ATProto handle (e.g. melontini.me)"

     

       314
       314
       -
       )

     

       315
       315
       -
       @click.option(

     

       316
       316
       -
           "-P", "--password",

     

       317
       317
       -
           envvar="ATPROTO_PASSWORD",

     

       318
       318
       -
           required=True,

     

       319
       319
       -
           help="ATProto/Bluesky app password (https://bsky.app/settings/app-passwords)"

     

       320
       320
       -
       )

     

       321
       321
       -
       @click.option('--data_dir', default='./data', type=click.Path(exists=True, file_okay=False, dir_okay=True, readable=True, writable=True))

     

       322
       322
       -
       def run(instance, token, handle, password, data_dir):

     

       323
       323
       -
           settings_path = os.path.join(data_dir, 'settings.json')

     

       324
       53
        
           if not os.path.exists(settings_path):

     

       325
       325
       -
               click.echo(f"First launch detected! creating {settings_path} and exiting..")

     

       326
       326
       -
               

     

       327
       327
       -
               with open(settings_path, 'w') as f:

     

       328
       328
       -
                   json.dump(util.DEFAULT_SETTINGS, f, indent=2)

     

       54
       54
       +
               LOGGER.info("First launch detected! Creating %s and exiting!", settings_path)

     

       55
       55
       +
       

     

       56
       56
       +
               with open(settings_path, "w") as f:

     

       57
       57
       +
                   f.write(as_json(DEFAULT_SETTINGS, indent=2))

     

       329
       58
        
               return 0

     

       330
       59
        
       

     

       331
       331
       -
           with open(settings_path, 'rb') as f:

     

       60
       60
       +
           LOGGER.info("Loading settings...")

     

       61
       61
       +
           with open(settings_path, "rb") as f:

     

       332
       62
        
               settings = json.load(f)

     

       333
       333
       -
           

     

       334
       334
       -
           click.echo(f"Connecting to {instance}...")

     

       335
       335
       -
           fedi = mastodon.Mastodon(instance, token)

     

       336
       336
       -
           

     

       337
       337
       -
           if not fedi.streaming:

     

       338
       338
       -
               click.echo(f"{fedi.instance} does not support streaming timelines!", err=True)

     

       339
       339
       -
               return -1

     

       340
       340
       -
               

     

       341
       341
       -
           id = fedi.get_user_id()

     

       342
       342
       -
           if not id:

     

       343
       343
       -
               click.echo(f"Failed to get user id from token for {fedi.instance}", err=True)

     

       344
       344
       -
               return -1

     

       345
       345
       -
           click.echo(f"Got user ID '{id}'")

     

       346
       346
       -
           

     

       347
       347
       -
           click.echo(f"Resolving ATP identity for {handle}...")

     

       348
       348
       -
           resolver = IdResolver()

     

       349
       349
       -
           did: str | None = resolver.handle.resolve(handle)

     

       350
       350
       -
           if not did:

     

       351
       351
       -
               click.echo(f"Failed to resolve atproto did for handle {handle}!", err=True)

     

       352
       352
       -
               return -1

     

       353
       353
       -
           

     

       354
       354
       -
           did_doc = resolver.did.resolve(did)

     

       355
       355
       -
           if not did_doc:

     

       356
       356
       -
               click.echo(f"Failed to resolve did document from {did}")

     

       357
       357
       -
               return -1

     

       358
       358
       -
           

     

       359
       359
       -
           pds = did_doc.get_pds_endpoint()

     

       360
       360
       -
           if not pds:

     

       361
       361
       -
               click.echo(f"Failed to resolve PDS endpoint for did {did}")

     

       362
       362
       -
               return -1

     

       63
       63
       +
       

     

       64
       64
       +
           LOGGER.info("Starting database worker...")

     

       65
       65
       +
           db_worker = database.DataBaseWorker(os.path.abspath(database_path))

     

       66
       66
       +
       

     

       67
       67
       +
           db_worker.execute("PRAGMA foreign_keys = ON;")

     

       68
       68
       +
       

     

       69
       69
       +
           # create the posts table

     

       70
       70
       +
           # id - internal id of the post

     

       71
       71
       +
           # user_id - user id on the service (e.g. a724sknj5y9ydk0w)

     

       72
       72
       +
           # service - the service (e.g. https://shrimp.melontini.me)

     

       73
       73
       +
           # identifier - post id on the service (e.g. a8mpiyeej0fpjp0p)

     

       74
       74
       +
           # parent_id - the internal id of the parent

     

       75
       75
       +
           db_worker.execute(

     

       76
       76
       +
               """

     

       77
       77
       +
               CREATE TABLE IF NOT EXISTS posts (

     

       78
       78
       +
                   id         INTEGER PRIMARY KEY AUTOINCREMENT,

     

       79
       79
       +
                   user_id    TEXT NOT NULL,

     

       80
       80
       +
                   service    TEXT NOT NULL,

     

       81
       81
       +
                   identifier TEXT NOT NULL,

     

       82
       82
       +
                   parent_id  INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL,

     

       83
       83
       +
                   root_id    INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       84
       84
       +
               );

     

       85
       85
       +
               """

     

       86
       86
       +
           )

     

       87
       87
       +
       

     

       88
       88
       +
           columns = db_worker.execute("PRAGMA table_info(posts)")

     

       89
       89
       +
           column_names = [col[1] for col in columns]

     

       90
       90
       +
           if "reposted_id" not in column_names:

     

       91
       91
       +
               db_worker.execute("""

     

       92
       92
       +
                   ALTER TABLE posts

     

       93
       93
       +
                   ADD COLUMN reposted_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       94
       94
       +
               """)

     

       95
       95
       +
           if "extra_data" not in column_names:

     

       96
       96
       +
               db_worker.execute("""

     

       97
       97
       +
                   ALTER TABLE posts

     

       98
       98
       +
                   ADD COLUMN extra_data TEXT NULL

     

       99
       99
       +
               """)

     

       100
       100
       +
       

     

       101
       101
       +
           # create the mappings table

     

       102
       102
       +
           # original_post_id - the post this was mapped from

     

       103
       103
       +
           # mapped_post_id - the post this was mapped to

     

       104
       104
       +
           db_worker.execute(

     

       105
       105
       +
               """

     

       106
       106
       +
               CREATE TABLE IF NOT EXISTS mappings (

     

       107
       107
       +
                   original_post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       108
       108
       +
                   mapped_post_id   INTEGER NOT NULL

     

       109
       109
       +
               );          

     

       110
       110
       +
               """

     

       111
       111
       +
           )

     

       112
       112
       +
       

     

       113
       113
       +
           input_settings = settings.get("input")

     

       114
       114
       +
           if not input_settings:

     

       115
       115
       +
               raise Exception("No input specified!")

     

       116
       116
       +
           outputs_settings = settings.get("outputs", [])

     

       117
       117
       +
       

     

       118
       118
       +
           input = INPUTS[input_settings["type"]](input_settings, db_worker)

     

       119
       119
       +
       

     

       120
       120
       +
           if not outputs_settings:

     

       121
       121
       +
               LOGGER.warning("No outputs specified! Check the config!")

     

       363
       122
        
       

     

       364
       364
       -
           click.echo(f"Logging in to {handle} through {pds}...")

     

       365
       365
       -
           atp = Client(pds)

     

       366
       366
       -
           atp.login(handle, password)

     

       123
       123
       +
           outputs: list[cross.Output] = []

     

       124
       124
       +
           for output_settings in outputs_settings:

     

       125
       125
       +
               outputs.append(

     

       126
       126
       +
                   OUTPUTS[output_settings["type"]](input, output_settings, db_worker)

     

       127
       127
       +
               )

     

       367
       128
        
       

     

       368
       368
       -
           click.echo("Starting worker thread...")

     

       369
       369
       -
           task_queue = queue.Queue()

     

       129
       129
       +
           LOGGER.info("Starting task worker...")

     

       370
       130
        
       

     

       371
       371
       -
           def worker():

     

       131
       131
       +
           def worker(queue: queue.Queue):

     

       372
       132
        
               while True:

     

       373
       373
       -
                   task = task_queue.get()

     

       133
       133
       +
                   task = queue.get()

     

       374
       134
        
                   if task is None:

     

       375
       135
        
                       break

     

       136
       136
       +
       

     

       376
       137
        
                   try:

     

       377
       138
        
                       task()

     

       378
       139
        
                   except Exception as e:

     

       379
       379
       -
                       click.echo(f"Exception in worker thread!\n{e}", err=True)

     

       140
       140
       +
                       LOGGER.error(f"Exception in worker thread!\n{e}")

     

       380
       141
        
                       traceback.print_exc()

     

       381
       381
       -
                   

     

       382
       382
       -
           thread = threading.Thread(target=worker, daemon=True)

     

       142
       142
       +
                   finally:

     

       143
       143
       +
                       queue.task_done()

     

       144
       144
       +
       

     

       145
       145
       +
           task_queue = queue.Queue()

     

       146
       146
       +
           thread = threading.Thread(target=worker, args=(task_queue,), daemon=True)

     

       383
       147
        
           thread.start()

     

       384
       384
       -
           

     

       385
       385
       -
           click.echo(f"Listening to {fedi.streaming}...")

     

       386
       386
       -
           listener = SocketListener(id, atp, settings, os.path.join(data_dir, 'data.db'))

     

       387
       387
       -
           

     

       388
       388
       -
           def handler(event_type, payload):

     

       389
       389
       -
               def handle_event():

     

       390
       390
       -
                   try:

     

       391
       391
       -
                       if event_type == 'update':

     

       392
       392
       -
                           listener.on_update(json.loads(payload))

     

       393
       393
       -
                       elif event_type == 'delete':

     

       394
       394
       -
                           listener.on_delete(payload)

     

       395
       395
       -
                       elif event_type == 'status.update':

     

       396
       396
       -
                           listener.on_status_update(json.loads(payload))

     

       397
       397
       -
                   except Exception as e:

     

       398
       398
       -
                       click.echo(f"Error in event handler: {e}", err=True)

     

       399
       399
       -
                       traceback.print_exc()

     

       400
       400
       -
               task_queue.put(handle_event)

     

       401
       401
       -
           

     

       402
       402
       -
           asyncio.run(fedi.connect_websocket(handler))

     

       403
       403
       -
           

     

       148
       148
       +
       

     

       149
       149
       +
           LOGGER.info("Connecting to %s...", input.service)

     

       150
       150
       +
           try:

     

       151
       151
       +
               asyncio.run(input.listen(outputs, lambda x: task_queue.put(x)))

     

       152
       152
       +
           except KeyboardInterrupt:

     

       153
       153
       +
               LOGGER.info("Stopping...")

     

       154
       154
       +
       

     

       404
       155
        
           task_queue.join()

     

       405
       405
       -
           

     

       406
       156
        
           task_queue.put(None)

     

       407
       157
        
           thread.join()

     

       408
       408
       -
           return 0

     

       158
       158
       +
       

     

       409
       159
        
       

     

       410
       160
        
       if __name__ == "__main__":

     

       411
       411
       -
           main()

     

       161
       161
       +
           execute("./data")

+52

mastodon/common.py

···

       1
       1
       +
       import cross

     

       2
       2
       +
       from util.media import MediaInfo

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       class MastodonPost(cross.Post):

     

       6
       6
       +
           def __init__(

     

       7
       7
       +
               self,

     

       8
       8
       +
               status: dict,

     

       9
       9
       +
               tokens: list[cross.Token],

     

       10
       10
       +
               media_attachments: list[MediaInfo],

     

       11
       11
       +
           ) -> None:

     

       12
       12
       +
               super().__init__()

     

       13
       13
       +
               self.id = status["id"]

     

       14
       14
       +
               self.parent_id = status.get("in_reply_to_id")

     

       15
       15
       +
               self.tokens = tokens

     

       16
       16
       +
               self.content_type = status.get("content_type", "text/plain")

     

       17
       17
       +
               self.timestamp = status["created_at"]

     

       18
       18
       +
               self.media_attachments = media_attachments

     

       19
       19
       +
               self.spoiler = status.get("spoiler_text")

     

       20
       20
       +
               self.language = [status["language"]] if status.get("language") else []

     

       21
       21
       +
               self.sensitive = status.get("sensitive", False)

     

       22
       22
       +
               self.url = status.get("url")

     

       23
       23
       +
       

     

       24
       24
       +
           def get_id(self) -> str:

     

       25
       25
       +
               return self.id

     

       26
       26
       +
       

     

       27
       27
       +
           def get_parent_id(self) -> str | None:

     

       28
       28
       +
               return self.parent_id

     

       29
       29
       +
       

     

       30
       30
       +
           def get_tokens(self) -> list[cross.Token]:

     

       31
       31
       +
               return self.tokens

     

       32
       32
       +
       

     

       33
       33
       +
           def get_text_type(self) -> str:

     

       34
       34
       +
               return self.content_type

     

       35
       35
       +
       

     

       36
       36
       +
           def get_timestamp(self) -> str:

     

       37
       37
       +
               return self.timestamp

     

       38
       38
       +
       

     

       39
       39
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       40
       40
       +
               return self.media_attachments

     

       41
       41
       +
       

     

       42
       42
       +
           def get_spoiler(self) -> str | None:

     

       43
       43
       +
               return self.spoiler

     

       44
       44
       +
       

     

       45
       45
       +
           def get_languages(self) -> list[str]:

     

       46
       46
       +
               return self.language

     

       47
       47
       +
       

     

       48
       48
       +
           def is_sensitive(self) -> bool:

     

       49
       49
       +
               return self.sensitive or (self.spoiler is not None and self.spoiler != "")

     

       50
       50
       +
       

     

       51
       51
       +
           def get_post_url(self) -> str | None:

     

       52
       52
       +
               return self.url

+225

mastodon/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from typing import Any, Callable

     

       5
       5
       +
       

     

       6
       6
       +
       import requests

     

       7
       7
       +
       import websockets

     

       8
       8
       +
       

     

       9
       9
       +
       import cross

     

       10
       10
       +
       import util.database as database

     

       11
       11
       +
       import util.html_util as html_util

     

       12
       12
       +
       import util.md_util as md_util

     

       13
       13
       +
       from mastodon.common import MastodonPost

     

       14
       14
       +
       from util.database import DataBaseWorker

     

       15
       15
       +
       from util.media import MediaInfo, download_media

     

       16
       16
       +
       from util.util import LOGGER, as_envvar

     

       17
       17
       +
       

     

       18
       18
       +
       ALLOWED_VISIBILITY = ["public", "unlisted"]

     

       19
       19
       +
       MARKDOWNY = ["text/x.misskeymarkdown", "text/markdown", "text/plain"]

     

       20
       20
       +
       

     

       21
       21
       +
       

     

       22
       22
       +
       class MastodonInputOptions:

     

       23
       23
       +
           def __init__(self, o: dict) -> None:

     

       24
       24
       +
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       25
       25
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       26
       26
       +
       

     

       27
       27
       +
               allowed_visibility = o.get("allowed_visibility")

     

       28
       28
       +
               if allowed_visibility is not None:

     

       29
       29
       +
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       30
       30
       +
                       raise ValueError(

     

       31
       31
       +
                           f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}"

     

       32
       32
       +
                       )

     

       33
       33
       +
                   self.allowed_visibility = allowed_visibility

     

       34
       34
       +
       

     

       35
       35
       +
       

     

       36
       36
       +
       class MastodonInput(cross.Input):

     

       37
       37
       +
           def __init__(self, settings: dict, db: DataBaseWorker) -> None:

     

       38
       38
       +
               self.options = MastodonInputOptions(settings.get("options", {}))

     

       39
       39
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       40
       40
       +
                   ValueError("'token' is required")

     

       41
       41
       +
               )

     

       42
       42
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       43
       43
       +
                   ValueError("'instance' is required")

     

       44
       44
       +
               )

     

       45
       45
       +
       

     

       46
       46
       +
               service = instance[:-1] if instance.endswith("/") else instance

     

       47
       47
       +
       

     

       48
       48
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       49
       49
       +
               responce = requests.get(

     

       50
       50
       +
                   f"{service}/api/v1/accounts/verify_credentials",

     

       51
       51
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       52
       52
       +
               )

     

       53
       53
       +
               if responce.status_code != 200:

     

       54
       54
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       55
       55
       +
                   responce.raise_for_status()

     

       56
       56
       +
                   return

     

       57
       57
       +
       

     

       58
       58
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       59
       59
       +
               self.streaming = self._get_streaming_url()

     

       60
       60
       +
       

     

       61
       61
       +
               if not self.streaming:

     

       62
       62
       +
                   raise Exception("Instance %s does not support streaming!", service)

     

       63
       63
       +
       

     

       64
       64
       +
           def _get_streaming_url(self):

     

       65
       65
       +
               response = requests.get(f"{self.service}/api/v1/instance")

     

       66
       66
       +
               response.raise_for_status()

     

       67
       67
       +
               data: dict = response.json()

     

       68
       68
       +
               return (data.get("urls") or {}).get("streaming_api")

     

       69
       69
       +
       

     

       70
       70
       +
           def __to_tokens(self, status: dict):

     

       71
       71
       +
               content_type = status.get("content_type", "text/plain")

     

       72
       72
       +
               raw_text = status.get("text")

     

       73
       73
       +
       

     

       74
       74
       +
               tags: list[str] = []

     

       75
       75
       +
               for tag in status.get("tags", []):

     

       76
       76
       +
                   tags.append(tag["name"])

     

       77
       77
       +
       

     

       78
       78
       +
               mentions: list[tuple[str, str]] = []

     

       79
       79
       +
               for mention in status.get("mentions", []):

     

       80
       80
       +
                   mentions.append(("@" + mention["username"], "@" + mention["acct"]))

     

       81
       81
       +
       

     

       82
       82
       +
               if raw_text and content_type in MARKDOWNY:

     

       83
       83
       +
                   return md_util.tokenize_markdown(raw_text, tags, mentions)

     

       84
       84
       +
       

     

       85
       85
       +
               akkoma_ext: dict | None = status.get("akkoma", {}).get("source")

     

       86
       86
       +
               if akkoma_ext:

     

       87
       87
       +
                   if akkoma_ext.get("mediaType") in MARKDOWNY:

     

       88
       88
       +
                       return md_util.tokenize_markdown(akkoma_ext["content"], tags, mentions)

     

       89
       89
       +
       

     

       90
       90
       +
               tokenizer = html_util.HTMLPostTokenizer()

     

       91
       91
       +
               tokenizer.mentions = mentions

     

       92
       92
       +
               tokenizer.tags = tags

     

       93
       93
       +
               tokenizer.feed(status.get("content", ""))

     

       94
       94
       +
               return tokenizer.get_tokens()

     

       95
       95
       +
       

     

       96
       96
       +
           def _on_create_post(self, outputs: list[cross.Output], status: dict):

     

       97
       97
       +
               # skip events from other users

     

       98
       98
       +
               if (status.get("account") or {})["id"] != self.user_id:

     

       99
       99
       +
                   return

     

       100
       100
       +
       

     

       101
       101
       +
               if status.get("visibility") not in self.options.allowed_visibility:

     

       102
       102
       +
                   # Skip f/o and direct posts

     

       103
       103
       +
                   LOGGER.info(

     

       104
       104
       +
                       "Skipping '%s'! '%s' visibility..",

     

       105
       105
       +
                       status["id"],

     

       106
       106
       +
                       status.get("visibility"),

     

       107
       107
       +
                   )

     

       108
       108
       +
                   return

     

       109
       109
       +
       

     

       110
       110
       +
               # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       111
       111
       +
               # we don't handle reblogs. possible with bridgy(?) and self

     

       112
       112
       +
               # we don't handle quotes.

     

       113
       113
       +
               if status.get("poll"):

     

       114
       114
       +
                   LOGGER.info("Skipping '%s'! Contains a poll..", status["id"])

     

       115
       115
       +
                   return

     

       116
       116
       +
       

     

       117
       117
       +
               if status.get("quote_id") or status.get("quote"):

     

       118
       118
       +
                   LOGGER.info("Skipping '%s'! Quote..", status["id"])

     

       119
       119
       +
                   return

     

       120
       120
       +
       

     

       121
       121
       +
               reblog: dict | None = status.get("reblog")

     

       122
       122
       +
               if reblog:

     

       123
       123
       +
                   if (reblog.get("account") or {})["id"] != self.user_id:

     

       124
       124
       +
                       LOGGER.info("Skipping '%s'! Reblog of other user..", status["id"])

     

       125
       125
       +
                       return

     

       126
       126
       +
       

     

       127
       127
       +
                   success = database.try_insert_repost(

     

       128
       128
       +
                       self.db, status["id"], reblog["id"], self.user_id, self.service

     

       129
       129
       +
                   )

     

       130
       130
       +
                   if not success:

     

       131
       131
       +
                       LOGGER.info(

     

       132
       132
       +
                           "Skipping '%s' as reblogged post was not found in db!", status["id"]

     

       133
       133
       +
                       )

     

       134
       134
       +
                       return

     

       135
       135
       +
       

     

       136
       136
       +
                   for output in outputs:

     

       137
       137
       +
                       output.accept_repost(status["id"], reblog["id"])

     

       138
       138
       +
                   return

     

       139
       139
       +
       

     

       140
       140
       +
               in_reply: str | None = status.get("in_reply_to_id")

     

       141
       141
       +
               in_reply_to: str | None = status.get("in_reply_to_account_id")

     

       142
       142
       +
               if in_reply_to and in_reply_to != self.user_id:

     

       143
       143
       +
                   # We don't support replies.

     

       144
       144
       +
                   LOGGER.info("Skipping '%s'! Reply to other user..", status["id"])

     

       145
       145
       +
                   return

     

       146
       146
       +
       

     

       147
       147
       +
               success = database.try_insert_post(

     

       148
       148
       +
                   self.db, status["id"], in_reply, self.user_id, self.service

     

       149
       149
       +
               )

     

       150
       150
       +
               if not success:

     

       151
       151
       +
                   LOGGER.info(

     

       152
       152
       +
                       "Skipping '%s' as parent post was not found in db!", status["id"]

     

       153
       153
       +
                   )

     

       154
       154
       +
                   return

     

       155
       155
       +
       

     

       156
       156
       +
               tokens = self.__to_tokens(status)

     

       157
       157
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       158
       158
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", status["id"])

     

       159
       159
       +
                   return

     

       160
       160
       +
       

     

       161
       161
       +
               LOGGER.info("Crossposting '%s'...", status["id"])

     

       162
       162
       +
       

     

       163
       163
       +
               media_attachments: list[MediaInfo] = []

     

       164
       164
       +
               for attachment in status.get("media_attachments", []):

     

       165
       165
       +
                   LOGGER.info("Downloading %s...", attachment["url"])

     

       166
       166
       +
                   info = download_media(

     

       167
       167
       +
                       attachment["url"], attachment.get("description") or ""

     

       168
       168
       +
                   )

     

       169
       169
       +
                   if not info:

     

       170
       170
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", status["id"])

     

       171
       171
       +
                       return

     

       172
       172
       +
                   media_attachments.append(info)

     

       173
       173
       +
       

     

       174
       174
       +
               cross_post = MastodonPost(status, tokens, media_attachments)

     

       175
       175
       +
               for output in outputs:

     

       176
       176
       +
                   output.accept_post(cross_post)

     

       177
       177
       +
       

     

       178
       178
       +
           def _on_delete_post(self, outputs: list[cross.Output], identifier: str):

     

       179
       179
       +
               post = database.find_post(self.db, identifier, self.user_id, self.service)

     

       180
       180
       +
               if not post:

     

       181
       181
       +
                   return

     

       182
       182
       +
       

     

       183
       183
       +
               LOGGER.info("Deleting '%s'...", identifier)

     

       184
       184
       +
               if post["reposted_id"]:

     

       185
       185
       +
                   for output in outputs:

     

       186
       186
       +
                       output.delete_repost(identifier)

     

       187
       187
       +
               else:

     

       188
       188
       +
                   for output in outputs:

     

       189
       189
       +
                       output.delete_post(identifier)

     

       190
       190
       +
       

     

       191
       191
       +
               database.delete_post(self.db, identifier, self.user_id, self.service)

     

       192
       192
       +
       

     

       193
       193
       +
           def _on_post(self, outputs: list[cross.Output], event: str, payload: str):

     

       194
       194
       +
               match event:

     

       195
       195
       +
                   case "update":

     

       196
       196
       +
                       self._on_create_post(outputs, json.loads(payload))

     

       197
       197
       +
                   case "delete":

     

       198
       198
       +
                       self._on_delete_post(outputs, payload)

     

       199
       199
       +
       

     

       200
       200
       +
           async def listen(

     

       201
       201
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       202
       202
       +
           ):

     

       203
       203
       +
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       204
       204
       +
       

     

       205
       205
       +
               async for ws in websockets.connect(

     

       206
       206
       +
                   uri, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       207
       207
       +
               ):

     

       208
       208
       +
                   try:

     

       209
       209
       +
                       LOGGER.info("Listening to %s...", self.streaming)

     

       210
       210
       +
       

     

       211
       211
       +
                       async def listen_for_messages():

     

       212
       212
       +
                           async for msg in ws:

     

       213
       213
       +
                               data = json.loads(msg)

     

       214
       214
       +
                               event: str = data.get("event")

     

       215
       215
       +
                               payload: str = data.get("payload")

     

       216
       216
       +
       

     

       217
       217
       +
                               submit(lambda: self._on_post(outputs, str(event), str(payload)))

     

       218
       218
       +
       

     

       219
       219
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       220
       220
       +
       

     

       221
       221
       +
                       await asyncio.gather(listen)

     

       222
       222
       +
                   except websockets.ConnectionClosedError as e:

     

       223
       223
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       224
       224
       +
                       LOGGER.info("Reconnecting to %s...", self.streaming)

     

       225
       225
       +
                       continue

+448

mastodon/output.py

···

       1
       1
       +
       import time

     

       2
       2
       +
       

     

       3
       3
       +
       import requests

     

       4
       4
       +
       

     

       5
       5
       +
       import cross

     

       6
       6
       +
       import misskey.mfm_util as mfm_util

     

       7
       7
       +
       import util.database as database

     

       8
       8
       +
       from util.database import DataBaseWorker

     

       9
       9
       +
       from util.media import MediaInfo

     

       10
       10
       +
       from util.util import LOGGER, as_envvar, canonical_label

     

       11
       11
       +
       

     

       12
       12
       +
       POSSIBLE_MIMES = [

     

       13
       13
       +
           "audio/ogg",

     

       14
       14
       +
           "audio/mp3",

     

       15
       15
       +
           "image/webp",

     

       16
       16
       +
           "image/jpeg",

     

       17
       17
       +
           "image/png",

     

       18
       18
       +
           "video/mp4",

     

       19
       19
       +
           "video/quicktime",

     

       20
       20
       +
           "video/webm",

     

       21
       21
       +
       ]

     

       22
       22
       +
       

     

       23
       23
       +
       TEXT_MIMES = ["text/x.misskeymarkdown", "text/markdown", "text/plain"]

     

       24
       24
       +
       

     

       25
       25
       +
       ALLOWED_POSTING_VISIBILITY = ["public", "unlisted", "private"]

     

       26
       26
       +
       

     

       27
       27
       +
       

     

       28
       28
       +
       class MastodonOutputOptions:

     

       29
       29
       +
           def __init__(self, o: dict) -> None:

     

       30
       30
       +
               self.visibility = "public"

     

       31
       31
       +
       

     

       32
       32
       +
               visibility = o.get("visibility")

     

       33
       33
       +
               if visibility is not None:

     

       34
       34
       +
                   if visibility not in ALLOWED_POSTING_VISIBILITY:

     

       35
       35
       +
                       raise ValueError(

     

       36
       36
       +
                           f"'visibility' only accepts {', '.join(ALLOWED_POSTING_VISIBILITY)}, got: {visibility}"

     

       37
       37
       +
                       )

     

       38
       38
       +
                   self.visibility = visibility

     

       39
       39
       +
       

     

       40
       40
       +
       

     

       41
       41
       +
       class MastodonOutput(cross.Output):

     

       42
       42
       +
           def __init__(self, input: cross.Input, settings: dict, db: DataBaseWorker) -> None:

     

       43
       43
       +
               super().__init__(input, settings, db)

     

       44
       44
       +
               self.options = settings.get("options") or {}

     

       45
       45
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       46
       46
       +
                   ValueError("'token' is required")

     

       47
       47
       +
               )

     

       48
       48
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       49
       49
       +
                   ValueError("'instance' is required")

     

       50
       50
       +
               )

     

       51
       51
       +
       

     

       52
       52
       +
               self.service = instance[:-1] if instance.endswith("/") else instance

     

       53
       53
       +
       

     

       54
       54
       +
               LOGGER.info("Verifying %s credentails...", self.service)

     

       55
       55
       +
               responce = requests.get(

     

       56
       56
       +
                   f"{self.service}/api/v1/accounts/verify_credentials",

     

       57
       57
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       58
       58
       +
               )

     

       59
       59
       +
               if responce.status_code != 200:

     

       60
       60
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       61
       61
       +
                   responce.raise_for_status()

     

       62
       62
       +
                   return

     

       63
       63
       +
               self.user_id: str = responce.json()["id"]

     

       64
       64
       +
       

     

       65
       65
       +
               LOGGER.info("Getting %s configuration...", self.service)

     

       66
       66
       +
               responce = requests.get(

     

       67
       67
       +
                   f"{self.service}/api/v1/instance",

     

       68
       68
       +
                   headers={"Authorization": f"Bearer {self.token}"},

     

       69
       69
       +
               )

     

       70
       70
       +
               if responce.status_code != 200:

     

       71
       71
       +
                   LOGGER.error("Failed to get instance info!")

     

       72
       72
       +
                   responce.raise_for_status()

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               instance_info: dict = responce.json()

     

       76
       76
       +
               configuration: dict = instance_info["configuration"]

     

       77
       77
       +
       

     

       78
       78
       +
               statuses_config: dict = configuration.get("statuses", {})

     

       79
       79
       +
               self.max_characters: int = statuses_config.get("max_characters", 500)

     

       80
       80
       +
               self.max_media_attachments: int = statuses_config.get(

     

       81
       81
       +
                   "max_media_attachments", 4

     

       82
       82
       +
               )

     

       83
       83
       +
               self.characters_reserved_per_url: int = statuses_config.get(

     

       84
       84
       +
                   "characters_reserved_per_url", 23

     

       85
       85
       +
               )

     

       86
       86
       +
       

     

       87
       87
       +
               media_config: dict = configuration.get("media_attachments", {})

     

       88
       88
       +
               self.image_size_limit: int = media_config.get("image_size_limit", 16777216)

     

       89
       89
       +
               self.video_size_limit: int = media_config.get("video_size_limit", 103809024)

     

       90
       90
       +
               self.supported_mime_types: list[str] = media_config.get(

     

       91
       91
       +
                   "supported_mime_types", POSSIBLE_MIMES

     

       92
       92
       +
               )

     

       93
       93
       +
       

     

       94
       94
       +
               # *oma: max post chars

     

       95
       95
       +
               max_toot_chars = instance_info.get("max_toot_chars")

     

       96
       96
       +
               if max_toot_chars:

     

       97
       97
       +
                   self.max_characters: int = max_toot_chars

     

       98
       98
       +
       

     

       99
       99
       +
               # *oma: max upload limit

     

       100
       100
       +
               upload_limit = instance_info.get("upload_limit")

     

       101
       101
       +
               if upload_limit:

     

       102
       102
       +
                   self.image_size_limit: int = upload_limit

     

       103
       103
       +
                   self.video_size_limit: int = upload_limit

     

       104
       104
       +
       

     

       105
       105
       +
               # chuckya: supported text types

     

       106
       106
       +
               chuckya_text_mimes: list[str] = statuses_config.get("supported_mime_types", [])

     

       107
       107
       +
               self.text_format = next(

     

       108
       108
       +
                   (mime for mime in TEXT_MIMES if mime in (chuckya_text_mimes)), "text/plain"

     

       109
       109
       +
               )

     

       110
       110
       +
       

     

       111
       111
       +
               # *oma ext: supported text types

     

       112
       112
       +
               pleroma = instance_info.get("pleroma")

     

       113
       113
       +
               if pleroma:

     

       114
       114
       +
                   post_formats: list[str] = pleroma.get("metadata", {}).get(

     

       115
       115
       +
                       "post_formats", []

     

       116
       116
       +
                   )

     

       117
       117
       +
                   self.text_format = next(

     

       118
       118
       +
                       (mime for mime in TEXT_MIMES if mime in post_formats), self.text_format

     

       119
       119
       +
                   )

     

       120
       120
       +
       

     

       121
       121
       +
           def upload_media(self, attachments: list[MediaInfo]) -> list[str] | None:

     

       122
       122
       +
               for a in attachments:

     

       123
       123
       +
                   if a.mime.startswith("image/") and len(a.io) > self.image_size_limit:

     

       124
       124
       +
                       return None

     

       125
       125
       +
       

     

       126
       126
       +
                   if a.mime.startswith("video/") and len(a.io) > self.video_size_limit:

     

       127
       127
       +
                       return None

     

       128
       128
       +
       

     

       129
       129
       +
                   if not a.mime.startswith("image/") and not a.mime.startswith("video/"):

     

       130
       130
       +
                       if len(a.io) > 7_000_000:

     

       131
       131
       +
                           return None

     

       132
       132
       +
       

     

       133
       133
       +
               uploads: list[dict] = []

     

       134
       134
       +
               for a in attachments:

     

       135
       135
       +
                   data = {}

     

       136
       136
       +
                   if a.alt:

     

       137
       137
       +
                       data["description"] = a.alt

     

       138
       138
       +
       

     

       139
       139
       +
                   req = requests.post(

     

       140
       140
       +
                       f"{self.service}/api/v2/media",

     

       141
       141
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       142
       142
       +
                       files={"file": (a.name, a.io, a.mime)},

     

       143
       143
       +
                       data=data,

     

       144
       144
       +
                   )

     

       145
       145
       +
       

     

       146
       146
       +
                   if req.status_code == 200:

     

       147
       147
       +
                       LOGGER.info("Uploaded %s! (%s)", a.name, req.json()["id"])

     

       148
       148
       +
                       uploads.append({"done": True, "id": req.json()["id"]})

     

       149
       149
       +
                   elif req.status_code == 202:

     

       150
       150
       +
                       LOGGER.info("Waiting for %s to process!", a.name)

     

       151
       151
       +
                       uploads.append({"done": False, "id": req.json()["id"]})

     

       152
       152
       +
                   else:

     

       153
       153
       +
                       LOGGER.error("Failed to upload %s! %s", a.name, req.text)

     

       154
       154
       +
                       req.raise_for_status()

     

       155
       155
       +
       

     

       156
       156
       +
               while any([not val["done"] for val in uploads]):

     

       157
       157
       +
                   LOGGER.info("Waiting for media to process...")

     

       158
       158
       +
                   time.sleep(3)

     

       159
       159
       +
                   for media in uploads:

     

       160
       160
       +
                       if media["done"]:

     

       161
       161
       +
                           continue

     

       162
       162
       +
       

     

       163
       163
       +
                       reqs = requests.get(

     

       164
       164
       +
                           f"{self.service}/api/v1/media/{media['id']}",

     

       165
       165
       +
                           headers={"Authorization": f"Bearer {self.token}"},

     

       166
       166
       +
                       )

     

       167
       167
       +
       

     

       168
       168
       +
                       if reqs.status_code == 206:

     

       169
       169
       +
                           continue

     

       170
       170
       +
       

     

       171
       171
       +
                       if reqs.status_code == 200:

     

       172
       172
       +
                           media["done"] = True

     

       173
       173
       +
                           continue

     

       174
       174
       +
                       reqs.raise_for_status()

     

       175
       175
       +
       

     

       176
       176
       +
               return [val["id"] for val in uploads]

     

       177
       177
       +
       

     

       178
       178
       +
           def token_to_string(self, tokens: list[cross.Token]) -> str | None:

     

       179
       179
       +
               p_text: str = ""

     

       180
       180
       +
       

     

       181
       181
       +
               for token in tokens:

     

       182
       182
       +
                   if isinstance(token, cross.TextToken):

     

       183
       183
       +
                       p_text += token.text

     

       184
       184
       +
                   elif isinstance(token, cross.TagToken):

     

       185
       185
       +
                       p_text += "#" + token.tag

     

       186
       186
       +
                   elif isinstance(token, cross.LinkToken):

     

       187
       187
       +
                       if canonical_label(token.label, token.href):

     

       188
       188
       +
                           p_text += token.href

     

       189
       189
       +
                       else:

     

       190
       190
       +
                           if self.text_format == "text/plain":

     

       191
       191
       +
                               p_text += f"{token.label} ({token.href})"

     

       192
       192
       +
                           elif self.text_format in {

     

       193
       193
       +
                               "text/x.misskeymarkdown",

     

       194
       194
       +
                               "text/markdown",

     

       195
       195
       +
                           }:

     

       196
       196
       +
                               p_text += f"[{token.label}]({token.href})"

     

       197
       197
       +
                   else:

     

       198
       198
       +
                       return None

     

       199
       199
       +
       

     

       200
       200
       +
               return p_text

     

       201
       201
       +
       

     

       202
       202
       +
           def split_tokens_media(self, tokens: list[cross.Token], media: list[MediaInfo]):

     

       203
       203
       +
               split_tokens = cross.split_tokens(

     

       204
       204
       +
                   tokens, self.max_characters, self.characters_reserved_per_url

     

       205
       205
       +
               )

     

       206
       206
       +
               post_text: list[str] = []

     

       207
       207
       +
       

     

       208
       208
       +
               for block in split_tokens:

     

       209
       209
       +
                   baked_text = self.token_to_string(block)

     

       210
       210
       +
       

     

       211
       211
       +
                   if baked_text is None:

     

       212
       212
       +
                       return None

     

       213
       213
       +
                   post_text.append(baked_text)

     

       214
       214
       +
       

     

       215
       215
       +
               if not post_text:

     

       216
       216
       +
                   post_text = [""]

     

       217
       217
       +
       

     

       218
       218
       +
               posts: list[dict] = [

     

       219
       219
       +
                   {"text": post_text, "attachments": []} for post_text in post_text

     

       220
       220
       +
               ]

     

       221
       221
       +
               available_indices: list[int] = list(range(len(posts)))

     

       222
       222
       +
       

     

       223
       223
       +
               current_image_post_idx: int | None = None

     

       224
       224
       +
       

     

       225
       225
       +
               def make_blank_post() -> dict:

     

       226
       226
       +
                   return {"text": "", "attachments": []}

     

       227
       227
       +
       

     

       228
       228
       +
               def pop_next_empty_index() -> int:

     

       229
       229
       +
                   if available_indices:

     

       230
       230
       +
                       return available_indices.pop(0)

     

       231
       231
       +
                   else:

     

       232
       232
       +
                       new_idx = len(posts)

     

       233
       233
       +
                       posts.append(make_blank_post())

     

       234
       234
       +
                       return new_idx

     

       235
       235
       +
       

     

       236
       236
       +
               for att in media:

     

       237
       237
       +
                   if (

     

       238
       238
       +
                       current_image_post_idx is not None

     

       239
       239
       +
                       and len(posts[current_image_post_idx]["attachments"])

     

       240
       240
       +
                       < self.max_media_attachments

     

       241
       241
       +
                   ):

     

       242
       242
       +
                       posts[current_image_post_idx]["attachments"].append(att)

     

       243
       243
       +
                   else:

     

       244
       244
       +
                       idx = pop_next_empty_index()

     

       245
       245
       +
                       posts[idx]["attachments"].append(att)

     

       246
       246
       +
                       current_image_post_idx = idx

     

       247
       247
       +
       

     

       248
       248
       +
               result: list[tuple[str, list[MediaInfo]]] = []

     

       249
       249
       +
       

     

       250
       250
       +
               for p in posts:

     

       251
       251
       +
                   result.append((p["text"], p["attachments"]))

     

       252
       252
       +
       

     

       253
       253
       +
               return result

     

       254
       254
       +
       

     

       255
       255
       +
           def accept_post(self, post: cross.Post):

     

       256
       256
       +
               parent_id = post.get_parent_id()

     

       257
       257
       +
       

     

       258
       258
       +
               new_root_id: int | None = None

     

       259
       259
       +
               new_parent_id: int | None = None

     

       260
       260
       +
       

     

       261
       261
       +
               reply_ref: str | None = None

     

       262
       262
       +
               if parent_id:

     

       263
       263
       +
                   thread_tuple = database.find_mapped_thread(

     

       264
       264
       +
                       self.db,

     

       265
       265
       +
                       parent_id,

     

       266
       266
       +
                       self.input.user_id,

     

       267
       267
       +
                       self.input.service,

     

       268
       268
       +
                       self.user_id,

     

       269
       269
       +
                       self.service,

     

       270
       270
       +
                   )

     

       271
       271
       +
       

     

       272
       272
       +
                   if not thread_tuple:

     

       273
       273
       +
                       LOGGER.error("Failed to find thread tuple in the database!")

     

       274
       274
       +
                       return None

     

       275
       275
       +
       

     

       276
       276
       +
                   _, reply_ref, new_root_id, new_parent_id = thread_tuple

     

       277
       277
       +
       

     

       278
       278
       +
               lang: str

     

       279
       279
       +
               if post.get_languages():

     

       280
       280
       +
                   lang = post.get_languages()[0]

     

       281
       281
       +
               else:

     

       282
       282
       +
                   lang = "en"

     

       283
       283
       +
       

     

       284
       284
       +
               post_tokens = post.get_tokens()

     

       285
       285
       +
               if post.get_text_type() == "text/x.misskeymarkdown":

     

       286
       286
       +
                   post_tokens, status = mfm_util.strip_mfm(post_tokens)

     

       287
       287
       +
                   post_url = post.get_post_url()

     

       288
       288
       +
                   if status and post_url:

     

       289
       289
       +
                       post_tokens.append(cross.TextToken("\n"))

     

       290
       290
       +
                       post_tokens.append(

     

       291
       291
       +
                           cross.LinkToken(post_url, "[Post contains MFM, see original]")

     

       292
       292
       +
                       )

     

       293
       293
       +
       

     

       294
       294
       +
               raw_statuses = self.split_tokens_media(post_tokens, post.get_attachments())

     

       295
       295
       +
               if not raw_statuses:

     

       296
       296
       +
                   LOGGER.error("Failed to split post into statuses?")

     

       297
       297
       +
                   return None

     

       298
       298
       +
               baked_statuses = []

     

       299
       299
       +
       

     

       300
       300
       +
               for status, raw_media in raw_statuses:

     

       301
       301
       +
                   media: list[str] | None = None

     

       302
       302
       +
                   if raw_media:

     

       303
       303
       +
                       media = self.upload_media(raw_media)

     

       304
       304
       +
                       if not media:

     

       305
       305
       +
                           LOGGER.error("Failed to upload attachments!")

     

       306
       306
       +
                           return None

     

       307
       307
       +
                       baked_statuses.append((status, media))

     

       308
       308
       +
                       continue

     

       309
       309
       +
                   baked_statuses.append((status, []))

     

       310
       310
       +
       

     

       311
       311
       +
               created_statuses: list[str] = []

     

       312
       312
       +
       

     

       313
       313
       +
               for status, media in baked_statuses:

     

       314
       314
       +
                   payload = {

     

       315
       315
       +
                       "status": status,

     

       316
       316
       +
                       "media_ids": media or [],

     

       317
       317
       +
                       "spoiler_text": post.get_spoiler() or "",

     

       318
       318
       +
                       "visibility": self.options.get("visibility", "public"),

     

       319
       319
       +
                       "content_type": self.text_format,

     

       320
       320
       +
                       "language": lang,

     

       321
       321
       +
                   }

     

       322
       322
       +
       

     

       323
       323
       +
                   if media:

     

       324
       324
       +
                       payload["sensitive"] = post.is_sensitive()

     

       325
       325
       +
       

     

       326
       326
       +
                       if post.get_spoiler():

     

       327
       327
       +
                           payload["sensitive"] = True

     

       328
       328
       +
       

     

       329
       329
       +
                       if not status:

     

       330
       330
       +
                           payload["status"] = "🖼️"

     

       331
       331
       +
       

     

       332
       332
       +
                   if reply_ref:

     

       333
       333
       +
                       payload["in_reply_to_id"] = reply_ref

     

       334
       334
       +
       

     

       335
       335
       +
                   reqs = requests.post(

     

       336
       336
       +
                       f"{self.service}/api/v1/statuses",

     

       337
       337
       +
                       headers={

     

       338
       338
       +
                           "Authorization": f"Bearer {self.token}",

     

       339
       339
       +
                           "Content-Type": "application/json",

     

       340
       340
       +
                       },

     

       341
       341
       +
                       json=payload,

     

       342
       342
       +
                   )

     

       343
       343
       +
       

     

       344
       344
       +
                   if reqs.status_code != 200:

     

       345
       345
       +
                       LOGGER.info(

     

       346
       346
       +
                           "Failed to post status! %s - %s", reqs.status_code, reqs.text

     

       347
       347
       +
                       )

     

       348
       348
       +
                       reqs.raise_for_status()

     

       349
       349
       +
       

     

       350
       350
       +
                   reply_ref = reqs.json()["id"]

     

       351
       351
       +
                   LOGGER.info("Created new status %s!", reply_ref)

     

       352
       352
       +
       

     

       353
       353
       +
                   created_statuses.append(reqs.json()["id"])

     

       354
       354
       +
       

     

       355
       355
       +
               db_post = database.find_post(

     

       356
       356
       +
                   self.db, post.get_id(), self.input.user_id, self.input.service

     

       357
       357
       +
               )

     

       358
       358
       +
               assert db_post, "ghghghhhhh"

     

       359
       359
       +
       

     

       360
       360
       +
               if new_root_id is None or new_parent_id is None:

     

       361
       361
       +
                   new_root_id = database.insert_post(

     

       362
       362
       +
                       self.db, created_statuses[0], self.user_id, self.service

     

       363
       363
       +
                   )

     

       364
       364
       +
                   new_parent_id = new_root_id

     

       365
       365
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       366
       366
       +
                   created_statuses = created_statuses[1:]

     

       367
       367
       +
       

     

       368
       368
       +
               for db_id in created_statuses:

     

       369
       369
       +
                   new_parent_id = database.insert_reply(

     

       370
       370
       +
                       self.db, db_id, self.user_id, self.service, new_parent_id, new_root_id

     

       371
       371
       +
                   )

     

       372
       372
       +
                   database.insert_mapping(self.db, db_post["id"], new_parent_id)

     

       373
       373
       +
       

     

       374
       374
       +
           def delete_post(self, identifier: str):

     

       375
       375
       +
               post = database.find_post(

     

       376
       376
       +
                   self.db, identifier, self.input.user_id, self.input.service

     

       377
       377
       +
               )

     

       378
       378
       +
               if not post:

     

       379
       379
       +
                   return

     

       380
       380
       +
       

     

       381
       381
       +
               mappings = database.find_mappings(

     

       382
       382
       +
                   self.db, post["id"], self.service, self.user_id

     

       383
       383
       +
               )

     

       384
       384
       +
               for mapping in mappings[::-1]:

     

       385
       385
       +
                   LOGGER.info("Deleting '%s'...", mapping[0])

     

       386
       386
       +
                   requests.delete(

     

       387
       387
       +
                       f"{self.service}/api/v1/statuses/{mapping[0]}",

     

       388
       388
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       389
       389
       +
                   )

     

       390
       390
       +
                   database.delete_post(self.db, mapping[0], self.service, self.user_id)

     

       391
       391
       +
       

     

       392
       392
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       393
       393
       +
               repost = self.__delete_repost(repost_id)

     

       394
       394
       +
               if not repost:

     

       395
       395
       +
                   return None

     

       396
       396
       +
       

     

       397
       397
       +
               reposted = database.find_post(

     

       398
       398
       +
                   self.db, reposted_id, self.input.user_id, self.input.service

     

       399
       399
       +
               )

     

       400
       400
       +
               if not reposted:

     

       401
       401
       +
                   return

     

       402
       402
       +
       

     

       403
       403
       +
               mappings = database.find_mappings(

     

       404
       404
       +
                   self.db, reposted["id"], self.service, self.user_id

     

       405
       405
       +
               )

     

       406
       406
       +
               if mappings:

     

       407
       407
       +
                   rsp = requests.post(

     

       408
       408
       +
                       f"{self.service}/api/v1/statuses/{mappings[0][0]}/reblog",

     

       409
       409
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       410
       410
       +
                   )

     

       411
       411
       +
       

     

       412
       412
       +
                   if rsp.status_code != 200:

     

       413
       413
       +
                       LOGGER.error(

     

       414
       414
       +
                           "Failed to boost status! status_code: %s, msg: %s",

     

       415
       415
       +
                           rsp.status_code,

     

       416
       416
       +
                           rsp.content,

     

       417
       417
       +
                       )

     

       418
       418
       +
                       return

     

       419
       419
       +
       

     

       420
       420
       +
                   internal_id = database.insert_repost(

     

       421
       421
       +
                       self.db, rsp.json()["id"], reposted["id"], self.user_id, self.service

     

       422
       422
       +
                   )

     

       423
       423
       +
                   database.insert_mapping(self.db, repost["id"], internal_id)

     

       424
       424
       +
       

     

       425
       425
       +
           def __delete_repost(self, repost_id: str) -> dict | None:

     

       426
       426
       +
               repost = database.find_post(

     

       427
       427
       +
                   self.db, repost_id, self.input.user_id, self.input.service

     

       428
       428
       +
               )

     

       429
       429
       +
               if not repost:

     

       430
       430
       +
                   return None

     

       431
       431
       +
       

     

       432
       432
       +
               mappings = database.find_mappings(

     

       433
       433
       +
                   self.db, repost["id"], self.service, self.user_id

     

       434
       434
       +
               )

     

       435
       435
       +
               reposted_mappings = database.find_mappings(

     

       436
       436
       +
                   self.db, repost["reposted_id"], self.service, self.user_id

     

       437
       437
       +
               )

     

       438
       438
       +
               if mappings and reposted_mappings:

     

       439
       439
       +
                   LOGGER.info("Deleting '%s'...", mappings[0][0])

     

       440
       440
       +
                   requests.post(

     

       441
       441
       +
                       f"{self.service}/api/v1/statuses/{reposted_mappings[0][0]}/unreblog",

     

       442
       442
       +
                       headers={"Authorization": f"Bearer {self.token}"},

     

       443
       443
       +
                   )

     

       444
       444
       +
                   database.delete_post(self.db, mappings[0][0], self.user_id, self.service)

     

       445
       445
       +
               return repost

     

       446
       446
       +
       

     

       447
       447
       +
           def delete_repost(self, repost_id: str):

     

       448
       448
       +
               self.__delete_repost(repost_id)

-37

mastodon.py

···

       1
       1
       -
       import requests, websockets

     

       2
       2
       -
       import util, json

     

       3
       3
       -
       

     

       4
       4
       -
       class Mastodon():

     

       5
       5
       -
           def __init__(self, instance: str, token: str) -> None:

     

       6
       6
       -
               self.token = token

     

       7
       7
       -
               self.instance = instance

     

       8
       8
       -
               self.streaming = self.get_streaming_url()

     

       9
       9
       -
       

     

       10
       10
       -
           def get_streaming_url(self):

     

       11
       11
       -
               response = requests.get(f"{self.instance}/api/v1/instance")

     

       12
       12
       -
               response.raise_for_status()

     

       13
       13
       -
               data: dict = response.json()

     

       14
       14
       -
               return util.safe_get(data, "urls", {}).get("streaming_api")

     

       15
       15
       -
           

     

       16
       16
       -
           def get_user_id(self):

     

       17
       17
       -
               responce = requests.get(f"{self.instance}/api/v1/accounts/verify_credentials", headers={

     

       18
       18
       -
                   'Authorization': f'Bearer {self.token}'

     

       19
       19
       -
               })

     

       20
       20
       -
           

     

       21
       21
       -
               if responce.status_code == 401:

     

       22
       22
       -
                   raise Exception("Invalid Mastodon API token provided!")

     

       23
       23
       -
           

     

       24
       24
       -
               return responce.json()["id"]

     

       25
       25
       -
           

     

       26
       26
       -
           async def connect_websocket(self, handler):

     

       27
       27
       -
               uri = f"{self.streaming}/api/v1/streaming?stream=user&access_token={self.token}"

     

       28
       28
       -
               async with websockets.connect(uri, extra_headers={

     

       29
       29
       -
                   "User-Agent": "XPost/0.0.1"

     

       30
       30
       -
               }) as websocket:

     

       31
       31
       -
                   while True:

     

       32
       32
       -
                       message = await websocket.recv()

     

       33
       33
       -
                       event: dict = json.loads(message)

     

       34
       34
       -
                   

     

       35
       35
       -
                       event_type = event.get('event')

     

       36
       36
       -
                       payload = event.get('payload')

     

       37
       37
       -
                       handler(event_type, payload)

-88

media_util.py

···

       1
       1
       -
       import requests

     

       2
       2
       -
       import click

     

       3
       3
       -
       import subprocess

     

       4
       4
       -
       import json

     

       5
       5
       -
       

     

       6
       6
       -
       def probe_bytes(bytes: bytes) -> dict:

     

       7
       7
       -
           cmd = [

     

       8
       8
       -
               'ffprobe',

     

       9
       9
       -
               '-v', 'error',

     

       10
       10
       -
               '-show_format',

     

       11
       11
       -
               '-show_streams',

     

       12
       12
       -
               '-print_format', 'json',

     

       13
       13
       -
               'pipe:0'

     

       14
       14
       -
           ]

     

       15
       15
       -
           proc = subprocess.run(cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     

       16
       16
       -
       

     

       17
       17
       -
           if proc.returncode != 0:

     

       18
       18
       -
               raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")

     

       19
       19
       -
       

     

       20
       20
       -
           return json.loads(proc.stdout)

     

       21
       21
       -
       

     

       22
       22
       -
       def compress_image(image_bytes: bytes):

     

       23
       23
       -
           cmd = [

     

       24
       24
       -
                   'ffmpeg',

     

       25
       25
       -
                   '-f', 'image2pipe',

     

       26
       26
       -
                   '-i', 'pipe:0',

     

       27
       27
       -
                   '-c:v', 'webp',

     

       28
       28
       -
                   '-q:v', '90',

     

       29
       29
       -
                   '-f', 'image2pipe',

     

       30
       30
       -
                   'pipe:1'

     

       31
       31
       -
               ]

     

       32
       32
       -
       

     

       33
       33
       -
           proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)

     

       34
       34
       -
           out_bytes, err = proc.communicate(input=image_bytes)

     

       35
       35
       -
           

     

       36
       36
       -
           if proc.returncode != 0:

     

       37
       37
       -
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       38
       38
       -
           

     

       39
       39
       -
           return out_bytes

     

       40
       40
       -
       

     

       41
       41
       -
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       42
       42
       -
           response = requests.get(url, stream=True, timeout=20)

     

       43
       43
       -
           if response.status_code != 200:

     

       44
       44
       -
               click.echo(f"Failed to download {url}! {response}")

     

       45
       45
       -
               return None

     

       46
       46
       -
           

     

       47
       47
       -
           downloaded_bytes = b""

     

       48
       48
       -
           current_size = 0

     

       49
       49
       -
           

     

       50
       50
       -
           for chunk in response.iter_content(chunk_size=8192):

     

       51
       51
       -
               if not chunk: 

     

       52
       52
       -
                   continue

     

       53
       53
       -
               

     

       54
       54
       -
               current_size += len(chunk)

     

       55
       55
       -
               if current_size > max_bytes:

     

       56
       56
       -
                   click.echo(f"Failed to download {url}, file too large!")

     

       57
       57
       -
                   response.close()

     

       58
       58
       -
                   return None

     

       59
       59
       -
               

     

       60
       60
       -
               downloaded_bytes += chunk

     

       61
       61
       -
           

     

       62
       62
       -
           return downloaded_bytes

     

       63
       63
       -
           

     

       64
       64
       -
       

     

       65
       65
       -
       def get_video_meta(video_bytes: bytes):

     

       66
       66
       -
           probe = probe_bytes(video_bytes)

     

       67
       67
       -
           video_streams = [s for s in probe['streams'] if s['codec_type'] == 'video']

     

       68
       68
       -
           if not video_streams:

     

       69
       69
       -
               raise ValueError("No video stream found")

     

       70
       70
       -
               

     

       71
       71
       -
           video = video_streams[0]

     

       72
       72
       -
           return {

     

       73
       73
       -
               'width': int(video['width']),

     

       74
       74
       -
               'height': int(video['height']),

     

       75
       75
       -
               'duration': float(video.get('duration', probe['format'].get('duration', -1)))

     

       76
       76
       -
           }

     

       77
       77
       -
       

     

       78
       78
       -
       def get_image_meta(image_bytes: bytes):

     

       79
       79
       -
           probe = probe_bytes(image_bytes)

     

       80
       80
       -
           stream = next((s for s in probe['streams'] if s['codec_type'] == 'video'), None)

     

       81
       81
       -
               

     

       82
       82
       -
           if not stream:

     

       83
       83
       -
               raise ValueError("No video stream found")

     

       84
       84
       -
               

     

       85
       85
       -
           return {

     

       86
       86
       -
               'width': int(stream['width']),

     

       87
       87
       -
               'height': int(stream['height'])

     

       88
       88
       -
           }

+54

misskey/common.py

···

       1
       1
       +
       import cross

     

       2
       2
       +
       from util.media import MediaInfo

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       class MisskeyPost(cross.Post):

     

       6
       6
       +
           def __init__(

     

       7
       7
       +
               self,

     

       8
       8
       +
               instance_url: str,

     

       9
       9
       +
               note: dict,

     

       10
       10
       +
               tokens: list[cross.Token],

     

       11
       11
       +
               files: list[MediaInfo],

     

       12
       12
       +
           ) -> None:

     

       13
       13
       +
               super().__init__()

     

       14
       14
       +
               self.note = note

     

       15
       15
       +
               self.id = note["id"]

     

       16
       16
       +
               self.parent_id = note.get("replyId")

     

       17
       17
       +
               self.tokens = tokens

     

       18
       18
       +
               self.timestamp = note["createdAt"]

     

       19
       19
       +
               self.media_attachments = files

     

       20
       20
       +
               self.spoiler = note.get("cw")

     

       21
       21
       +
               self.sensitive = any(

     

       22
       22
       +
                   [a.get("isSensitive", False) for a in note.get("files", [])]

     

       23
       23
       +
               )

     

       24
       24
       +
               self.url = instance_url + "/notes/" + note["id"]

     

       25
       25
       +
       

     

       26
       26
       +
           def get_id(self) -> str:

     

       27
       27
       +
               return self.id

     

       28
       28
       +
       

     

       29
       29
       +
           def get_parent_id(self) -> str | None:

     

       30
       30
       +
               return self.parent_id

     

       31
       31
       +
       

     

       32
       32
       +
           def get_tokens(self) -> list[cross.Token]:

     

       33
       33
       +
               return self.tokens

     

       34
       34
       +
       

     

       35
       35
       +
           def get_text_type(self) -> str:

     

       36
       36
       +
               return "text/x.misskeymarkdown"

     

       37
       37
       +
       

     

       38
       38
       +
           def get_timestamp(self) -> str:

     

       39
       39
       +
               return self.timestamp

     

       40
       40
       +
       

     

       41
       41
       +
           def get_attachments(self) -> list[MediaInfo]:

     

       42
       42
       +
               return self.media_attachments

     

       43
       43
       +
       

     

       44
       44
       +
           def get_spoiler(self) -> str | None:

     

       45
       45
       +
               return self.spoiler

     

       46
       46
       +
       

     

       47
       47
       +
           def get_languages(self) -> list[str]:

     

       48
       48
       +
               return []

     

       49
       49
       +
       

     

       50
       50
       +
           def is_sensitive(self) -> bool:

     

       51
       51
       +
               return self.sensitive or (self.spoiler is not None and self.spoiler != "")

     

       52
       52
       +
       

     

       53
       53
       +
           def get_post_url(self) -> str | None:

     

       54
       54
       +
               return self.url

+202

misskey/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       import uuid

     

       5
       5
       +
       from typing import Any, Callable

     

       6
       6
       +
       

     

       7
       7
       +
       import requests

     

       8
       8
       +
       import websockets

     

       9
       9
       +
       

     

       10
       10
       +
       import cross

     

       11
       11
       +
       import util.database as database

     

       12
       12
       +
       import util.md_util as md_util

     

       13
       13
       +
       from misskey.common import MisskeyPost

     

       14
       14
       +
       from util.media import MediaInfo, download_media

     

       15
       15
       +
       from util.util import LOGGER, as_envvar

     

       16
       16
       +
       

     

       17
       17
       +
       ALLOWED_VISIBILITY = ["public", "home"]

     

       18
       18
       +
       

     

       19
       19
       +
       

     

       20
       20
       +
       class MisskeyInputOptions:

     

       21
       21
       +
           def __init__(self, o: dict) -> None:

     

       22
       22
       +
               self.allowed_visibility = ALLOWED_VISIBILITY

     

       23
       23
       +
               self.filters = [re.compile(f) for f in o.get("regex_filters", [])]

     

       24
       24
       +
       

     

       25
       25
       +
               allowed_visibility = o.get("allowed_visibility")

     

       26
       26
       +
               if allowed_visibility is not None:

     

       27
       27
       +
                   if any([v not in ALLOWED_VISIBILITY for v in allowed_visibility]):

     

       28
       28
       +
                       raise ValueError(

     

       29
       29
       +
                           f"'allowed_visibility' only accepts {', '.join(ALLOWED_VISIBILITY)}, got: {allowed_visibility}"

     

       30
       30
       +
                       )

     

       31
       31
       +
                   self.allowed_visibility = allowed_visibility

     

       32
       32
       +
       

     

       33
       33
       +
       

     

       34
       34
       +
       class MisskeyInput(cross.Input):

     

       35
       35
       +
           def __init__(self, settings: dict, db: cross.DataBaseWorker) -> None:

     

       36
       36
       +
               self.options = MisskeyInputOptions(settings.get("options", {}))

     

       37
       37
       +
               self.token = as_envvar(settings.get("token")) or (_ for _ in ()).throw(

     

       38
       38
       +
                   ValueError("'token' is required")

     

       39
       39
       +
               )

     

       40
       40
       +
               instance: str = as_envvar(settings.get("instance")) or (_ for _ in ()).throw(

     

       41
       41
       +
                   ValueError("'instance' is required")

     

       42
       42
       +
               )

     

       43
       43
       +
       

     

       44
       44
       +
               service = instance[:-1] if instance.endswith("/") else instance

     

       45
       45
       +
       

     

       46
       46
       +
               LOGGER.info("Verifying %s credentails...", service)

     

       47
       47
       +
               responce = requests.post(

     

       48
       48
       +
                   f"{instance}/api/i",

     

       49
       49
       +
                   json={"i": self.token},

     

       50
       50
       +
                   headers={"Content-Type": "application/json"},

     

       51
       51
       +
               )

     

       52
       52
       +
               if responce.status_code != 200:

     

       53
       53
       +
                   LOGGER.error("Failed to validate user credentials!")

     

       54
       54
       +
                   responce.raise_for_status()

     

       55
       55
       +
                   return

     

       56
       56
       +
       

     

       57
       57
       +
               super().__init__(service, responce.json()["id"], settings, db)

     

       58
       58
       +
       

     

       59
       59
       +
           def _on_note(self, outputs: list[cross.Output], note: dict):

     

       60
       60
       +
               if note["userId"] != self.user_id:

     

       61
       61
       +
                   return

     

       62
       62
       +
       

     

       63
       63
       +
               if note.get("visibility") not in self.options.allowed_visibility:

     

       64
       64
       +
                   LOGGER.info(

     

       65
       65
       +
                       "Skipping '%s'! '%s' visibility..", note["id"], note.get("visibility")

     

       66
       66
       +
                   )

     

       67
       67
       +
                   return

     

       68
       68
       +
       

     

       69
       69
       +
               # TODO polls not supported on bsky. maybe 3rd party? skip for now

     

       70
       70
       +
               # we don't handle reblogs. possible with bridgy(?) and self

     

       71
       71
       +
               if note.get("poll"):

     

       72
       72
       +
                   LOGGER.info("Skipping '%s'! Contains a poll..", note["id"])

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               renote: dict | None = note.get("renote")

     

       76
       76
       +
               if renote:

     

       77
       77
       +
                   if note.get("text") is not None:

     

       78
       78
       +
                       LOGGER.info("Skipping '%s'! Quote..", note["id"])

     

       79
       79
       +
                       return

     

       80
       80
       +
       

     

       81
       81
       +
                   if renote.get("userId") != self.user_id:

     

       82
       82
       +
                       LOGGER.info("Skipping '%s'! Reblog of other user..", note["id"])

     

       83
       83
       +
                       return

     

       84
       84
       +
       

     

       85
       85
       +
                   success = database.try_insert_repost(

     

       86
       86
       +
                       self.db, note["id"], renote["id"], self.user_id, self.service

     

       87
       87
       +
                   )

     

       88
       88
       +
                   if not success:

     

       89
       89
       +
                       LOGGER.info(

     

       90
       90
       +
                           "Skipping '%s' as renoted note was not found in db!", note["id"]

     

       91
       91
       +
                       )

     

       92
       92
       +
                       return

     

       93
       93
       +
       

     

       94
       94
       +
                   for output in outputs:

     

       95
       95
       +
                       output.accept_repost(note["id"], renote["id"])

     

       96
       96
       +
                   return

     

       97
       97
       +
       

     

       98
       98
       +
               reply_id: str | None = note.get("replyId")

     

       99
       99
       +
               if reply_id:

     

       100
       100
       +
                   if note.get("reply", {}).get("userId") != self.user_id:

     

       101
       101
       +
                       LOGGER.info("Skipping '%s'! Reply to other user..", note["id"])

     

       102
       102
       +
                       return

     

       103
       103
       +
       

     

       104
       104
       +
               success = database.try_insert_post(

     

       105
       105
       +
                   self.db, note["id"], reply_id, self.user_id, self.service

     

       106
       106
       +
               )

     

       107
       107
       +
               if not success:

     

       108
       108
       +
                   LOGGER.info("Skipping '%s' as parent note was not found in db!", note["id"])

     

       109
       109
       +
                   return

     

       110
       110
       +
       

     

       111
       111
       +
               mention_handles: dict = note.get("mentionHandles") or {}

     

       112
       112
       +
               tags: list[str] = note.get("tags") or []

     

       113
       113
       +
       

     

       114
       114
       +
               handles: list[tuple[str, str]] = []

     

       115
       115
       +
               for key, value in mention_handles.items():

     

       116
       116
       +
                   handles.append((value, value))

     

       117
       117
       +
       

     

       118
       118
       +
               tokens = md_util.tokenize_markdown(note.get("text", ""), tags, handles)

     

       119
       119
       +
               if not cross.test_filters(tokens, self.options.filters):

     

       120
       120
       +
                   LOGGER.info("Skipping '%s'. Matched a filter!", note["id"])

     

       121
       121
       +
                   return

     

       122
       122
       +
       

     

       123
       123
       +
               LOGGER.info("Crossposting '%s'...", note["id"])

     

       124
       124
       +
       

     

       125
       125
       +
               media_attachments: list[MediaInfo] = []

     

       126
       126
       +
               for attachment in note.get("files", []):

     

       127
       127
       +
                   LOGGER.info("Downloading %s...", attachment["url"])

     

       128
       128
       +
                   info = download_media(attachment["url"], attachment.get("comment") or "")

     

       129
       129
       +
                   if not info:

     

       130
       130
       +
                       LOGGER.error("Skipping '%s'. Failed to download media!", note["id"])

     

       131
       131
       +
                       return

     

       132
       132
       +
                   media_attachments.append(info)

     

       133
       133
       +
       

     

       134
       134
       +
               cross_post = MisskeyPost(self.service, note, tokens, media_attachments)

     

       135
       135
       +
               for output in outputs:

     

       136
       136
       +
                   output.accept_post(cross_post)

     

       137
       137
       +
       

     

       138
       138
       +
           def _on_delete(self, outputs: list[cross.Output], note: dict):

     

       139
       139
       +
               # TODO handle deletes

     

       140
       140
       +
               pass

     

       141
       141
       +
       

     

       142
       142
       +
           def _on_message(self, outputs: list[cross.Output], data: dict):

     

       143
       143
       +
               if data["type"] == "channel":

     

       144
       144
       +
                   type: str = data["body"]["type"]

     

       145
       145
       +
                   if type == "note" or type == "reply":

     

       146
       146
       +
                       note_body = data["body"]["body"]

     

       147
       147
       +
                       self._on_note(outputs, note_body)

     

       148
       148
       +
                       return

     

       149
       149
       +
       

     

       150
       150
       +
               pass

     

       151
       151
       +
       

     

       152
       152
       +
           async def _send_keepalive(self, ws: websockets.WebSocketClientProtocol):

     

       153
       153
       +
               while ws.open:

     

       154
       154
       +
                   try:

     

       155
       155
       +
                       await asyncio.sleep(120)

     

       156
       156
       +
                       if ws.open:

     

       157
       157
       +
                           await ws.send("h")

     

       158
       158
       +
                           LOGGER.debug("Sent keepalive h..")

     

       159
       159
       +
                       else:

     

       160
       160
       +
                           LOGGER.info("WebSocket is closed, stopping keepalive task.")

     

       161
       161
       +
                           break

     

       162
       162
       +
                   except Exception as e:

     

       163
       163
       +
                       LOGGER.error(f"Error sending keepalive: {e}")

     

       164
       164
       +
                       break

     

       165
       165
       +
       

     

       166
       166
       +
           async def _subscribe_to_home(self, ws: websockets.WebSocketClientProtocol):

     

       167
       167
       +
               await ws.send(

     

       168
       168
       +
                   json.dumps(

     

       169
       169
       +
                       {

     

       170
       170
       +
                           "type": "connect",

     

       171
       171
       +
                           "body": {"channel": "homeTimeline", "id": str(uuid.uuid4())},

     

       172
       172
       +
                       }

     

       173
       173
       +
                   )

     

       174
       174
       +
               )

     

       175
       175
       +
               LOGGER.info("Subscribed to 'homeTimeline' channel...")

     

       176
       176
       +
       

     

       177
       177
       +
           async def listen(

     

       178
       178
       +
               self, outputs: list[cross.Output], submit: Callable[[Callable[[], Any]], Any]

     

       179
       179
       +
           ):

     

       180
       180
       +
               streaming: str = f"wss://{self.service.split('://', 1)[1]}"

     

       181
       181
       +
               url: str = f"{streaming}/streaming?i={self.token}"

     

       182
       182
       +
       

     

       183
       183
       +
               async for ws in websockets.connect(

     

       184
       184
       +
                   url, extra_headers={"User-Agent": "XPost/0.0.3"}

     

       185
       185
       +
               ):

     

       186
       186
       +
                   try:

     

       187
       187
       +
                       LOGGER.info("Listening to %s...", streaming)

     

       188
       188
       +
                       await self._subscribe_to_home(ws)

     

       189
       189
       +
       

     

       190
       190
       +
                       async def listen_for_messages():

     

       191
       191
       +
                           async for msg in ws:

     

       192
       192
       +
                               # TODO listen to deletes somehow

     

       193
       193
       +
                               submit(lambda: self._on_message(outputs, json.loads(msg)))

     

       194
       194
       +
       

     

       195
       195
       +
                       keepalive = asyncio.create_task(self._send_keepalive(ws))

     

       196
       196
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       197
       197
       +
       

     

       198
       198
       +
                       await asyncio.gather(keepalive, listen)

     

       199
       199
       +
                   except websockets.ConnectionClosedError as e:

     

       200
       200
       +
                       LOGGER.error(e, stack_info=True, exc_info=True)

     

       201
       201
       +
                       LOGGER.info("Reconnecting to %s...", streaming)

     

       202
       202
       +
                       continue

+38

misskey/mfm_util.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       

     

       5
       5
       +
       MFM_PATTERN = re.compile(r"\$\[([^\[\]]+)\]")

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       def strip_mfm(tokens: list[cross.Token]) -> tuple[list[cross.Token], bool]:

     

       9
       9
       +
           modified = False

     

       10
       10
       +
       

     

       11
       11
       +
           for tk in tokens:

     

       12
       12
       +
               if isinstance(tk, cross.TextToken):

     

       13
       13
       +
                   original = tk.text

     

       14
       14
       +
                   cleaned = __strip_mfm(original)

     

       15
       15
       +
                   if cleaned != original:

     

       16
       16
       +
                       modified = True

     

       17
       17
       +
                       tk.text = cleaned

     

       18
       18
       +
       

     

       19
       19
       +
               elif isinstance(tk, cross.LinkToken):

     

       20
       20
       +
                   original = tk.label

     

       21
       21
       +
                   cleaned = __strip_mfm(original)

     

       22
       22
       +
                   if cleaned != original:

     

       23
       23
       +
                       modified = True

     

       24
       24
       +
                       tk.label = cleaned

     

       25
       25
       +
       

     

       26
       26
       +
           return tokens, modified

     

       27
       27
       +
       

     

       28
       28
       +
       

     

       29
       29
       +
       def __strip_mfm(text: str) -> str:

     

       30
       30
       +
           def match_contents(match: re.Match[str]):

     

       31
       31
       +
               content = match.group(1).strip()

     

       32
       32
       +
               parts = content.split(" ", 1)

     

       33
       33
       +
               return parts[1] if len(parts) > 1 else ""

     

       34
       34
       +
       

     

       35
       35
       +
           while MFM_PATTERN.search(text):

     

       36
       36
       +
               text = MFM_PATTERN.sub(match_contents, text)

     

       37
       37
       +
       

     

       38
       38
       +
           return text

+3 -1

pyproject.toml

···

       1
       1
        
       [project]

     

       2
       2
        
       name = "xpost"

     

       3
       3
       -
       version = "0.0.1"

     

       3
       3
       +
       version = "0.0.3"

     

       4
       4
        
       description = "mastodon -> bluesky crossposting tool"

     

       5
       5
        
       readme = "README.md"

     

       6
       6
        
       requires-python = ">=3.12"

     

       7
       7
        
       dependencies = [

     

       8
       8
        
           "atproto>=0.0.61",

     

       9
       9
        
           "click>=8.2.1",

     

       10
       10
       +
           "python-magic>=0.4.27",

     

       10
       11
        
           "requests>=2.32.3",

     

       12
       12
       +
           "websockets>=13.1",

     

       11
       13
        
       ]

+290

util/database.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import queue

     

       3
       3
       +
       import sqlite3

     

       4
       4
       +
       import threading

     

       5
       5
       +
       from concurrent.futures import Future

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       class DataBaseWorker:

     

       9
       9
       +
           def __init__(self, database: str) -> None:

     

       10
       10
       +
               super(DataBaseWorker, self).__init__()

     

       11
       11
       +
               self.database = database

     

       12
       12
       +
               self.queue = queue.Queue()

     

       13
       13
       +
               self.thread = threading.Thread(target=self._run, daemon=True)

     

       14
       14
       +
               self.shutdown_event = threading.Event()

     

       15
       15
       +
               self.conn = sqlite3.connect(self.database, check_same_thread=False)

     

       16
       16
       +
               self.lock = threading.Lock()

     

       17
       17
       +
               self.thread.start()

     

       18
       18
       +
       

     

       19
       19
       +
           def _run(self):

     

       20
       20
       +
               while not self.shutdown_event.is_set():

     

       21
       21
       +
                   try:

     

       22
       22
       +
                       task, future = self.queue.get(timeout=1)

     

       23
       23
       +
                       try:

     

       24
       24
       +
                           with self.lock:

     

       25
       25
       +
                               result = task(self.conn)

     

       26
       26
       +
                           future.set_result(result)

     

       27
       27
       +
                       except Exception as e:

     

       28
       28
       +
                           future.set_exception(e)

     

       29
       29
       +
                       finally:

     

       30
       30
       +
                           self.queue.task_done()

     

       31
       31
       +
                   except queue.Empty:

     

       32
       32
       +
                       continue

     

       33
       33
       +
       

     

       34
       34
       +
           def execute(self, sql: str, params=()):

     

       35
       35
       +
               def task(conn: sqlite3.Connection):

     

       36
       36
       +
                   cursor = conn.execute(sql, params)

     

       37
       37
       +
                   conn.commit()

     

       38
       38
       +
                   return cursor.fetchall()

     

       39
       39
       +
       

     

       40
       40
       +
               future = Future()

     

       41
       41
       +
               self.queue.put((task, future))

     

       42
       42
       +
               return future.result()

     

       43
       43
       +
       

     

       44
       44
       +
           def close(self):

     

       45
       45
       +
               self.shutdown_event.set()

     

       46
       46
       +
               self.thread.join()

     

       47
       47
       +
               with self.lock:

     

       48
       48
       +
                   self.conn.close()

     

       49
       49
       +
       

     

       50
       50
       +
       

     

       51
       51
       +
       def try_insert_repost(

     

       52
       52
       +
           db: DataBaseWorker,

     

       53
       53
       +
           post_id: str,

     

       54
       54
       +
           reposted_id: str,

     

       55
       55
       +
           input_user: str,

     

       56
       56
       +
           input_service: str,

     

       57
       57
       +
       ) -> bool:

     

       58
       58
       +
           reposted = find_post(db, reposted_id, input_user, input_service)

     

       59
       59
       +
           if not reposted:

     

       60
       60
       +
               return False

     

       61
       61
       +
       

     

       62
       62
       +
           insert_repost(db, post_id, reposted["id"], input_user, input_service)

     

       63
       63
       +
           return True

     

       64
       64
       +
       

     

       65
       65
       +
       

     

       66
       66
       +
       def try_insert_post(

     

       67
       67
       +
           db: DataBaseWorker,

     

       68
       68
       +
           post_id: str,

     

       69
       69
       +
           in_reply: str | None,

     

       70
       70
       +
           input_user: str,

     

       71
       71
       +
           input_service: str,

     

       72
       72
       +
       ) -> bool:

     

       73
       73
       +
           root_id = None

     

       74
       74
       +
           parent_id = None

     

       75
       75
       +
       

     

       76
       76
       +
           if in_reply:

     

       77
       77
       +
               parent_post = find_post(db, in_reply, input_user, input_service)

     

       78
       78
       +
               if not parent_post:

     

       79
       79
       +
                   return False

     

       80
       80
       +
       

     

       81
       81
       +
               root_id = parent_post["id"]

     

       82
       82
       +
               parent_id = root_id

     

       83
       83
       +
               if parent_post["root_id"]:

     

       84
       84
       +
                   root_id = parent_post["root_id"]

     

       85
       85
       +
       

     

       86
       86
       +
           if root_id and parent_id:

     

       87
       87
       +
               insert_reply(db, post_id, input_user, input_service, parent_id, root_id)

     

       88
       88
       +
           else:

     

       89
       89
       +
               insert_post(db, post_id, input_user, input_service)

     

       90
       90
       +
       

     

       91
       91
       +
           return True

     

       92
       92
       +
       

     

       93
       93
       +
       

     

       94
       94
       +
       def insert_repost(

     

       95
       95
       +
           db: DataBaseWorker, identifier: str, reposted_id: int, user_id: str, serivce: str

     

       96
       96
       +
       ) -> int:

     

       97
       97
       +
           db.execute(

     

       98
       98
       +
               """

     

       99
       99
       +
               INSERT INTO posts (user_id, service, identifier, reposted_id)

     

       100
       100
       +
               VALUES (?, ?, ?, ?);

     

       101
       101
       +
               """,

     

       102
       102
       +
               (user_id, serivce, identifier, reposted_id),

     

       103
       103
       +
           )

     

       104
       104
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       105
       105
       +
       

     

       106
       106
       +
       

     

       107
       107
       +
       def insert_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str) -> int:

     

       108
       108
       +
           db.execute(

     

       109
       109
       +
               """

     

       110
       110
       +
               INSERT INTO posts (user_id, service, identifier)

     

       111
       111
       +
               VALUES (?, ?, ?);

     

       112
       112
       +
               """,

     

       113
       113
       +
               (user_id, serivce, identifier),

     

       114
       114
       +
           )

     

       115
       115
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       116
       116
       +
       

     

       117
       117
       +
       

     

       118
       118
       +
       def insert_reply(

     

       119
       119
       +
           db: DataBaseWorker,

     

       120
       120
       +
           identifier: str,

     

       121
       121
       +
           user_id: str,

     

       122
       122
       +
           serivce: str,

     

       123
       123
       +
           parent: int,

     

       124
       124
       +
           root: int,

     

       125
       125
       +
       ) -> int:

     

       126
       126
       +
           db.execute(

     

       127
       127
       +
               """

     

       128
       128
       +
               INSERT INTO posts (user_id, service, identifier, parent_id, root_id)

     

       129
       129
       +
               VALUES (?, ?, ?, ?, ?);

     

       130
       130
       +
               """,

     

       131
       131
       +
               (user_id, serivce, identifier, parent, root),

     

       132
       132
       +
           )

     

       133
       133
       +
           return db.execute("SELECT last_insert_rowid();", ())[0][0]

     

       134
       134
       +
       

     

       135
       135
       +
       

     

       136
       136
       +
       def insert_mapping(db: DataBaseWorker, original: int, mapped: int):

     

       137
       137
       +
           db.execute(

     

       138
       138
       +
               """

     

       139
       139
       +
           INSERT INTO mappings (original_post_id, mapped_post_id)

     

       140
       140
       +
           VALUES (?, ?);

     

       141
       141
       +
           """,

     

       142
       142
       +
               (original, mapped),

     

       143
       143
       +
           )

     

       144
       144
       +
       

     

       145
       145
       +
       

     

       146
       146
       +
       def delete_post(db: DataBaseWorker, identifier: str, user_id: str, serivce: str):

     

       147
       147
       +
           db.execute(

     

       148
       148
       +
               """

     

       149
       149
       +
               DELETE FROM posts

     

       150
       150
       +
               WHERE identifier = ?

     

       151
       151
       +
                 AND service = ?

     

       152
       152
       +
                 AND user_id = ?

     

       153
       153
       +
               """,

     

       154
       154
       +
               (identifier, serivce, user_id),

     

       155
       155
       +
           )

     

       156
       156
       +
       

     

       157
       157
       +
       

     

       158
       158
       +
       def fetch_data(db: DataBaseWorker, identifier: str, user_id: str, service: str) -> dict:

     

       159
       159
       +
           result = db.execute(

     

       160
       160
       +
               """

     

       161
       161
       +
               SELECT extra_data

     

       162
       162
       +
               FROM posts 

     

       163
       163
       +
               WHERE identifier = ? 

     

       164
       164
       +
                 AND user_id = ? 

     

       165
       165
       +
                 AND service = ?

     

       166
       166
       +
               """,

     

       167
       167
       +
               (identifier, user_id, service),

     

       168
       168
       +
           )

     

       169
       169
       +
           if not result or not result[0]:

     

       170
       170
       +
               return {}

     

       171
       171
       +
           return json.loads(result[0][0])

     

       172
       172
       +
       

     

       173
       173
       +
       

     

       174
       174
       +
       def store_data(

     

       175
       175
       +
           db: DataBaseWorker, identifier: str, user_id: str, service: str, extra_data: dict

     

       176
       176
       +
       ) -> None:

     

       177
       177
       +
           db.execute(

     

       178
       178
       +
               """

     

       179
       179
       +
               UPDATE posts

     

       180
       180
       +
               SET extra_data = ?

     

       181
       181
       +
               WHERE identifier = ?

     

       182
       182
       +
                 AND user_id = ?

     

       183
       183
       +
                 AND service = ?

     

       184
       184
       +
               """,

     

       185
       185
       +
               (json.dumps(extra_data), identifier, user_id, service),

     

       186
       186
       +
           )

     

       187
       187
       +
       

     

       188
       188
       +
       

     

       189
       189
       +
       def find_mappings(

     

       190
       190
       +
           db: DataBaseWorker, original_post: int, service: str, user_id: str

     

       191
       191
       +
       ) -> list[str]:

     

       192
       192
       +
           return db.execute(

     

       193
       193
       +
               """

     

       194
       194
       +
               SELECT p.identifier

     

       195
       195
       +
               FROM posts AS p

     

       196
       196
       +
               JOIN mappings AS m

     

       197
       197
       +
                 ON p.id = m.mapped_post_id

     

       198
       198
       +
               WHERE m.original_post_id = ?

     

       199
       199
       +
                 AND p.service = ?

     

       200
       200
       +
                 AND p.user_id = ?

     

       201
       201
       +
               ORDER BY p.id;

     

       202
       202
       +
               """,

     

       203
       203
       +
               (original_post, service, user_id),

     

       204
       204
       +
           )

     

       205
       205
       +
       

     

       206
       206
       +
       

     

       207
       207
       +
       def find_post_by_id(db: DataBaseWorker, id: int) -> dict | None:

     

       208
       208
       +
           result = db.execute(

     

       209
       209
       +
               """

     

       210
       210
       +
               SELECT user_id, service, identifier, parent_id, root_id, reposted_id

     

       211
       211
       +
               FROM posts 

     

       212
       212
       +
               WHERE id = ?

     

       213
       213
       +
               """,

     

       214
       214
       +
               (id,),

     

       215
       215
       +
           )

     

       216
       216
       +
           if not result:

     

       217
       217
       +
               return None

     

       218
       218
       +
           user_id, service, identifier, parent_id, root_id, reposted_id = result[0]

     

       219
       219
       +
           return {

     

       220
       220
       +
               "user_id": user_id,

     

       221
       221
       +
               "service": service,

     

       222
       222
       +
               "identifier": identifier,

     

       223
       223
       +
               "parent_id": parent_id,

     

       224
       224
       +
               "root_id": root_id,

     

       225
       225
       +
               "reposted_id": reposted_id,

     

       226
       226
       +
           }

     

       227
       227
       +
       

     

       228
       228
       +
       

     

       229
       229
       +
       def find_post(

     

       230
       230
       +
           db: DataBaseWorker, identifier: str, user_id: str, service: str

     

       231
       231
       +
       ) -> dict | None:

     

       232
       232
       +
           result = db.execute(

     

       233
       233
       +
               """

     

       234
       234
       +
               SELECT id, parent_id, root_id, reposted_id

     

       235
       235
       +
               FROM posts 

     

       236
       236
       +
               WHERE identifier = ? 

     

       237
       237
       +
                 AND user_id = ? 

     

       238
       238
       +
                 AND service = ?

     

       239
       239
       +
               """,

     

       240
       240
       +
               (identifier, user_id, service),

     

       241
       241
       +
           )

     

       242
       242
       +
           if not result:

     

       243
       243
       +
               return None

     

       244
       244
       +
           id, parent_id, root_id, reposted_id = result[0]

     

       245
       245
       +
           return {

     

       246
       246
       +
               "id": id,

     

       247
       247
       +
               "parent_id": parent_id,

     

       248
       248
       +
               "root_id": root_id,

     

       249
       249
       +
               "reposted_id": reposted_id,

     

       250
       250
       +
           }

     

       251
       251
       +
       

     

       252
       252
       +
       

     

       253
       253
       +
       def find_mapped_thread(

     

       254
       254
       +
           db: DataBaseWorker,

     

       255
       255
       +
           parent_id: str,

     

       256
       256
       +
           input_user: str,

     

       257
       257
       +
           input_service: str,

     

       258
       258
       +
           output_user: str,

     

       259
       259
       +
           output_service: str,

     

       260
       260
       +
       ):

     

       261
       261
       +
           reply_data: dict | None = find_post(db, parent_id, input_user, input_service)

     

       262
       262
       +
           if not reply_data:

     

       263
       263
       +
               return None

     

       264
       264
       +
       

     

       265
       265
       +
           reply_mappings: list[str] | None = find_mappings(

     

       266
       266
       +
               db, reply_data["id"], output_service, output_user

     

       267
       267
       +
           )

     

       268
       268
       +
           if not reply_mappings:

     

       269
       269
       +
               return None

     

       270
       270
       +
       

     

       271
       271
       +
           reply_identifier: str = reply_mappings[-1]

     

       272
       272
       +
           root_identifier: str = reply_mappings[0]

     

       273
       273
       +
           if reply_data["root_id"]:

     

       274
       274
       +
               root_data = find_post_by_id(db, reply_data["root_id"])

     

       275
       275
       +
               if not root_data:

     

       276
       276
       +
                   return None

     

       277
       277
       +
       

     

       278
       278
       +
               root_mappings = find_mappings(

     

       279
       279
       +
                   db, reply_data["root_id"], output_service, output_user

     

       280
       280
       +
               )

     

       281
       281
       +
               if not root_mappings:

     

       282
       282
       +
                   return None

     

       283
       283
       +
               root_identifier = root_mappings[0]

     

       284
       284
       +
       

     

       285
       285
       +
           return (

     

       286
       286
       +
               root_identifier[0],  # real ids

     

       287
       287
       +
               reply_identifier[0],

     

       288
       288
       +
               reply_data["root_id"],  # db ids

     

       289
       289
       +
               reply_data["id"],

     

       290
       290
       +
           )

+172

util/html_util.py

···

       1
       1
       +
       from html.parser import HTMLParser

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       

     

       5
       5
       +
       

     

       6
       6
       +
       class HTMLPostTokenizer(HTMLParser):

     

       7
       7
       +
           def __init__(self) -> None:

     

       8
       8
       +
               super().__init__()

     

       9
       9
       +
               self.tokens: list[cross.Token] = []

     

       10
       10
       +
       

     

       11
       11
       +
               self.mentions: list[tuple[str, str]]

     

       12
       12
       +
               self.tags: list[str]

     

       13
       13
       +
       

     

       14
       14
       +
               self.in_pre = False

     

       15
       15
       +
               self.in_code = False

     

       16
       16
       +
       

     

       17
       17
       +
               self.current_tag_stack = []

     

       18
       18
       +
               self.list_stack = []

     

       19
       19
       +
       

     

       20
       20
       +
               self.anchor_stack = []

     

       21
       21
       +
               self.anchor_data = []

     

       22
       22
       +
       

     

       23
       23
       +
           def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:

     

       24
       24
       +
               attrs_dict = dict(attrs)

     

       25
       25
       +
       

     

       26
       26
       +
               def append_newline():

     

       27
       27
       +
                   if self.tokens:

     

       28
       28
       +
                       last_token = self.tokens[-1]

     

       29
       29
       +
                       if isinstance(

     

       30
       30
       +
                           last_token, cross.TextToken

     

       31
       31
       +
                       ) and not last_token.text.endswith("\n"):

     

       32
       32
       +
                           self.tokens.append(cross.TextToken("\n"))

     

       33
       33
       +
       

     

       34
       34
       +
               match tag:

     

       35
       35
       +
                   case "br":

     

       36
       36
       +
                       self.tokens.append(cross.TextToken("  \n"))

     

       37
       37
       +
                   case "a":

     

       38
       38
       +
                       href = attrs_dict.get("href", "")

     

       39
       39
       +
                       self.anchor_stack.append(href)

     

       40
       40
       +
                   case "strong", "b":

     

       41
       41
       +
                       self.tokens.append(cross.TextToken("**"))

     

       42
       42
       +
                   case "em", "i":

     

       43
       43
       +
                       self.tokens.append(cross.TextToken("*"))

     

       44
       44
       +
                   case "del", "s":

     

       45
       45
       +
                       self.tokens.append(cross.TextToken("~~"))

     

       46
       46
       +
                   case "code":

     

       47
       47
       +
                       if not self.in_pre:

     

       48
       48
       +
                           self.tokens.append(cross.TextToken("`"))

     

       49
       49
       +
                           self.in_code = True

     

       50
       50
       +
                   case "pre":

     

       51
       51
       +
                       append_newline()

     

       52
       52
       +
                       self.tokens.append(cross.TextToken("```\n"))

     

       53
       53
       +
                       self.in_pre = True

     

       54
       54
       +
                   case "blockquote":

     

       55
       55
       +
                       append_newline()

     

       56
       56
       +
                       self.tokens.append(cross.TextToken("> "))

     

       57
       57
       +
                   case "ul", "ol":

     

       58
       58
       +
                       self.list_stack.append(tag)

     

       59
       59
       +
                       append_newline()

     

       60
       60
       +
                   case "li":

     

       61
       61
       +
                       indent = "  " * (len(self.list_stack) - 1)

     

       62
       62
       +
                       if self.list_stack and self.list_stack[-1] == "ul":

     

       63
       63
       +
                           self.tokens.append(cross.TextToken(f"{indent}- "))

     

       64
       64
       +
                       elif self.list_stack and self.list_stack[-1] == "ol":

     

       65
       65
       +
                           self.tokens.append(cross.TextToken(f"{indent}1. "))

     

       66
       66
       +
                   case _:

     

       67
       67
       +
                       if tag in {"h1", "h2", "h3", "h4", "h5", "h6"}:

     

       68
       68
       +
                           level = int(tag[1])

     

       69
       69
       +
                           self.tokens.append(cross.TextToken("\n" + "#" * level + " "))

     

       70
       70
       +
       

     

       71
       71
       +
               self.current_tag_stack.append(tag)

     

       72
       72
       +
       

     

       73
       73
       +
           def handle_data(self, data: str) -> None:

     

       74
       74
       +
               if self.anchor_stack:

     

       75
       75
       +
                   self.anchor_data.append(data)

     

       76
       76
       +
               else:

     

       77
       77
       +
                   self.tokens.append(cross.TextToken(data))

     

       78
       78
       +
       

     

       79
       79
       +
           def handle_endtag(self, tag: str) -> None:

     

       80
       80
       +
               if not self.current_tag_stack:

     

       81
       81
       +
                   return

     

       82
       82
       +
       

     

       83
       83
       +
               if tag in self.current_tag_stack:

     

       84
       84
       +
                   self.current_tag_stack.remove(tag)

     

       85
       85
       +
       

     

       86
       86
       +
               match tag:

     

       87
       87
       +
                   case "p":

     

       88
       88
       +
                       self.tokens.append(cross.TextToken("\n\n"))

     

       89
       89
       +
                   case "a":

     

       90
       90
       +
                       href = self.anchor_stack.pop()

     

       91
       91
       +
                       anchor_data = "".join(self.anchor_data)

     

       92
       92
       +
                       self.anchor_data = []

     

       93
       93
       +
       

     

       94
       94
       +
                       if anchor_data.startswith("#"):

     

       95
       95
       +
                           as_tag = anchor_data[1:].lower()

     

       96
       96
       +
                           if any(as_tag == block for block in self.tags):

     

       97
       97
       +
                               self.tokens.append(cross.TagToken(anchor_data[1:]))

     

       98
       98
       +
                       elif anchor_data.startswith("@"):

     

       99
       99
       +
                           match = next(

     

       100
       100
       +
                               (pair for pair in self.mentions if anchor_data in pair), None

     

       101
       101
       +
                           )

     

       102
       102
       +
       

     

       103
       103
       +
                           if match:

     

       104
       104
       +
                               self.tokens.append(cross.MentionToken(match[1], ""))

     

       105
       105
       +
                       else:

     

       106
       106
       +
                           self.tokens.append(cross.LinkToken(href, anchor_data))

     

       107
       107
       +
                   case "strong", "b":

     

       108
       108
       +
                       self.tokens.append(cross.TextToken("**"))

     

       109
       109
       +
                   case "em", "i":

     

       110
       110
       +
                       self.tokens.append(cross.TextToken("*"))

     

       111
       111
       +
                   case "del", "s":

     

       112
       112
       +
                       self.tokens.append(cross.TextToken("~~"))

     

       113
       113
       +
                   case "code":

     

       114
       114
       +
                       if not self.in_pre and self.in_code:

     

       115
       115
       +
                           self.tokens.append(cross.TextToken("`"))

     

       116
       116
       +
                           self.in_code = False

     

       117
       117
       +
                   case "pre":

     

       118
       118
       +
                       self.tokens.append(cross.TextToken("\n```\n"))

     

       119
       119
       +
                       self.in_pre = False

     

       120
       120
       +
                   case "blockquote":

     

       121
       121
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       122
       122
       +
                   case "ul", "ol":

     

       123
       123
       +
                       if self.list_stack:

     

       124
       124
       +
                           self.list_stack.pop()

     

       125
       125
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       126
       126
       +
                   case "li":

     

       127
       127
       +
                       self.tokens.append(cross.TextToken("\n"))

     

       128
       128
       +
                   case _:

     

       129
       129
       +
                       if tag in ["h1", "h2", "h3", "h4", "h5", "h6"]:

     

       130
       130
       +
                           self.tokens.append(cross.TextToken("\n"))

     

       131
       131
       +
       

     

       132
       132
       +
           def get_tokens(self) -> list[cross.Token]:

     

       133
       133
       +
               if not self.tokens:

     

       134
       134
       +
                   return []

     

       135
       135
       +
       

     

       136
       136
       +
               combined: list[cross.Token] = []

     

       137
       137
       +
               buffer: list[str] = []

     

       138
       138
       +
       

     

       139
       139
       +
               def flush_buffer():

     

       140
       140
       +
                   if buffer:

     

       141
       141
       +
                       merged = "".join(buffer)

     

       142
       142
       +
                       combined.append(cross.TextToken(text=merged))

     

       143
       143
       +
                       buffer.clear()

     

       144
       144
       +
       

     

       145
       145
       +
               for token in self.tokens:

     

       146
       146
       +
                   if isinstance(token, cross.TextToken):

     

       147
       147
       +
                       buffer.append(token.text)

     

       148
       148
       +
                   else:

     

       149
       149
       +
                       flush_buffer()

     

       150
       150
       +
                       combined.append(token)

     

       151
       151
       +
       

     

       152
       152
       +
               flush_buffer()

     

       153
       153
       +
       

     

       154
       154
       +
               if combined and isinstance(combined[-1], cross.TextToken):

     

       155
       155
       +
                   if combined[-1].text.endswith("\n\n"):

     

       156
       156
       +
                       combined[-1] = cross.TextToken(combined[-1].text[:-2])

     

       157
       157
       +
               return combined

     

       158
       158
       +
       

     

       159
       159
       +
           def reset(self):

     

       160
       160
       +
               """Reset the parser state for reuse."""

     

       161
       161
       +
               super().reset()

     

       162
       162
       +
               self.tokens = []

     

       163
       163
       +
       

     

       164
       164
       +
               self.mentions = []

     

       165
       165
       +
               self.tags = []

     

       166
       166
       +
       

     

       167
       167
       +
               self.in_pre = False

     

       168
       168
       +
               self.in_code = False

     

       169
       169
       +
       

     

       170
       170
       +
               self.current_tag_stack = []

     

       171
       171
       +
               self.anchor_stack = []

     

       172
       172
       +
               self.list_stack = []

+123

util/md_util.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       import cross

     

       4
       4
       +
       import util.html_util as html_util

     

       5
       5
       +
       import util.util as util

     

       6
       6
       +
       

     

       7
       7
       +
       URL = re.compile(r"(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+", re.IGNORECASE)

     

       8
       8
       +
       MD_INLINE_LINK = re.compile(

     

       9
       9
       +
           r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)",

     

       10
       10
       +
           re.IGNORECASE,

     

       11
       11
       +
       )

     

       12
       12
       +
       MD_AUTOLINK = re.compile(

     

       13
       13
       +
           r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE

     

       14
       14
       +
       )

     

       15
       15
       +
       HASHTAG = re.compile(r"(?<!\w)\#([\w]+)")

     

       16
       16
       +
       FEDIVERSE_HANDLE = re.compile(r"(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?")

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       def tokenize_markdown(

     

       20
       20
       +
           text: str, tags: list[str], handles: list[tuple[str, str]]

     

       21
       21
       +
       ) -> list[cross.Token]:

     

       22
       22
       +
           if not text:

     

       23
       23
       +
               return []

     

       24
       24
       +
       

     

       25
       25
       +
           tokenizer = html_util.HTMLPostTokenizer()

     

       26
       26
       +
           tokenizer.mentions = handles

     

       27
       27
       +
           tokenizer.tags = tags

     

       28
       28
       +
           tokenizer.feed(text)

     

       29
       29
       +
           html_tokens = tokenizer.get_tokens()

     

       30
       30
       +
       

     

       31
       31
       +
           tokens: list[cross.Token] = []

     

       32
       32
       +
       

     

       33
       33
       +
           for tk in html_tokens:

     

       34
       34
       +
               if isinstance(tk, cross.TextToken):

     

       35
       35
       +
                   tokens.extend(__tokenize_md(tk.text, tags, handles))

     

       36
       36
       +
               elif isinstance(tk, cross.LinkToken):

     

       37
       37
       +
                   if not tk.label or util.canonical_label(tk.label, tk.href):

     

       38
       38
       +
                       tokens.append(tk)

     

       39
       39
       +
                       continue

     

       40
       40
       +
       

     

       41
       41
       +
                   tokens.extend(__tokenize_md(f"[{tk.label}]({tk.href})", tags, handles))

     

       42
       42
       +
               else:

     

       43
       43
       +
                   tokens.append(tk)

     

       44
       44
       +
       

     

       45
       45
       +
           return tokens

     

       46
       46
       +
       

     

       47
       47
       +
       

     

       48
       48
       +
       def __tokenize_md(

     

       49
       49
       +
           text: str, tags: list[str], handles: list[tuple[str, str]]

     

       50
       50
       +
       ) -> list[cross.Token]:

     

       51
       51
       +
           index: int = 0

     

       52
       52
       +
           total: int = len(text)

     

       53
       53
       +
           buffer: list[str] = []

     

       54
       54
       +
       

     

       55
       55
       +
           tokens: list[cross.Token] = []

     

       56
       56
       +
       

     

       57
       57
       +
           def flush():

     

       58
       58
       +
               nonlocal buffer

     

       59
       59
       +
               if buffer:

     

       60
       60
       +
                   tokens.append(cross.TextToken("".join(buffer)))

     

       61
       61
       +
                   buffer = []

     

       62
       62
       +
       

     

       63
       63
       +
           while index < total:

     

       64
       64
       +
               if text[index] == "[":

     

       65
       65
       +
                   md_inline = MD_INLINE_LINK.match(text, index)

     

       66
       66
       +
                   if md_inline:

     

       67
       67
       +
                       flush()

     

       68
       68
       +
                       label = md_inline.group(1)

     

       69
       69
       +
                       href = md_inline.group(2)

     

       70
       70
       +
                       tokens.append(cross.LinkToken(href, label))

     

       71
       71
       +
                       index = md_inline.end()

     

       72
       72
       +
                       continue

     

       73
       73
       +
       

     

       74
       74
       +
               if text[index] == "<":

     

       75
       75
       +
                   md_auto = MD_AUTOLINK.match(text, index)

     

       76
       76
       +
                   if md_auto:

     

       77
       77
       +
                       flush()

     

       78
       78
       +
                       href = md_auto.group(1)

     

       79
       79
       +
                       tokens.append(cross.LinkToken(href, href))

     

       80
       80
       +
                       index = md_auto.end()

     

       81
       81
       +
                       continue

     

       82
       82
       +
       

     

       83
       83
       +
               if text[index] == "#":

     

       84
       84
       +
                   tag = HASHTAG.match(text, index)

     

       85
       85
       +
                   if tag:

     

       86
       86
       +
                       tag_text = tag.group(1)

     

       87
       87
       +
                       if tag_text.lower() in tags:

     

       88
       88
       +
                           flush()

     

       89
       89
       +
                           tokens.append(cross.TagToken(tag_text))

     

       90
       90
       +
                           index = tag.end()

     

       91
       91
       +
                           continue

     

       92
       92
       +
       

     

       93
       93
       +
               if text[index] == "@":

     

       94
       94
       +
                   handle = FEDIVERSE_HANDLE.match(text, index)

     

       95
       95
       +
                   if handle:

     

       96
       96
       +
                       handle_text = handle.group(0)

     

       97
       97
       +
                       stripped_handle = handle_text.strip()

     

       98
       98
       +
       

     

       99
       99
       +
                       match = next(

     

       100
       100
       +
                           (pair for pair in handles if stripped_handle in pair), None

     

       101
       101
       +
                       )

     

       102
       102
       +
       

     

       103
       103
       +
                       if match:

     

       104
       104
       +
                           flush()

     

       105
       105
       +
                           tokens.append(

     

       106
       106
       +
                               cross.MentionToken(match[1], "")

     

       107
       107
       +
                           )  # TODO: misskey doesn’t provide a uri

     

       108
       108
       +
                           index = handle.end()

     

       109
       109
       +
                           continue

     

       110
       110
       +
       

     

       111
       111
       +
               url = URL.match(text, index)

     

       112
       112
       +
               if url:

     

       113
       113
       +
                   flush()

     

       114
       114
       +
                   href = url.group(0)

     

       115
       115
       +
                   tokens.append(cross.LinkToken(href, href))

     

       116
       116
       +
                   index = url.end()

     

       117
       117
       +
                   continue

     

       118
       118
       +
       

     

       119
       119
       +
               buffer.append(text[index])

     

       120
       120
       +
               index += 1

     

       121
       121
       +
       

     

       122
       122
       +
           flush()

     

       123
       123
       +
           return tokens

+160

util/media.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import os

     

       3
       3
       +
       import re

     

       4
       4
       +
       import subprocess

     

       5
       5
       +
       import urllib.parse

     

       6
       6
       +
       

     

       7
       7
       +
       import magic

     

       8
       8
       +
       import requests

     

       9
       9
       +
       

     

       10
       10
       +
       from util.util import LOGGER

     

       11
       11
       +
       

     

       12
       12
       +
       FILENAME = re.compile(r'filename="?([^\";]*)"?')

     

       13
       13
       +
       MAGIC = magic.Magic(mime=True)

     

       14
       14
       +
       

     

       15
       15
       +
       

     

       16
       16
       +
       class MediaInfo:

     

       17
       17
       +
           def __init__(self, url: str, name: str, mime: str, alt: str, io: bytes) -> None:

     

       18
       18
       +
               self.url = url

     

       19
       19
       +
               self.name = name

     

       20
       20
       +
               self.mime = mime

     

       21
       21
       +
               self.alt = alt

     

       22
       22
       +
               self.io = io

     

       23
       23
       +
       

     

       24
       24
       +
       

     

       25
       25
       +
       def download_media(url: str, alt: str) -> MediaInfo | None:

     

       26
       26
       +
           name = get_filename_from_url(url)

     

       27
       27
       +
           io = download_blob(url, max_bytes=100_000_000)

     

       28
       28
       +
           if not io:

     

       29
       29
       +
               LOGGER.error("Failed to download media attachment! %s", url)

     

       30
       30
       +
               return None

     

       31
       31
       +
           mime = MAGIC.from_buffer(io)

     

       32
       32
       +
           if not mime:

     

       33
       33
       +
               mime = "application/octet-stream"

     

       34
       34
       +
           return MediaInfo(url, name, mime, alt, io)

     

       35
       35
       +
       

     

       36
       36
       +
       

     

       37
       37
       +
       def get_filename_from_url(url):

     

       38
       38
       +
           try:

     

       39
       39
       +
               response = requests.head(url, allow_redirects=True)

     

       40
       40
       +
               disposition = response.headers.get("Content-Disposition")

     

       41
       41
       +
               if disposition:

     

       42
       42
       +
                   filename = FILENAME.findall(disposition)

     

       43
       43
       +
                   if filename:

     

       44
       44
       +
                       return filename[0]

     

       45
       45
       +
           except requests.RequestException:

     

       46
       46
       +
               pass

     

       47
       47
       +
       

     

       48
       48
       +
           parsed_url = urllib.parse.urlparse(url)

     

       49
       49
       +
           base_name = os.path.basename(parsed_url.path)

     

       50
       50
       +
       

     

       51
       51
       +
           # hardcoded fix to return the cid for pds

     

       52
       52
       +
           if base_name == "com.atproto.sync.getBlob":

     

       53
       53
       +
               qs = urllib.parse.parse_qs(parsed_url.query)

     

       54
       54
       +
               if qs and qs.get("cid"):

     

       55
       55
       +
                   return qs["cid"][0]

     

       56
       56
       +
       

     

       57
       57
       +
           return base_name

     

       58
       58
       +
       

     

       59
       59
       +
       

     

       60
       60
       +
       def probe_bytes(bytes: bytes) -> dict:

     

       61
       61
       +
           cmd = [

     

       62
       62
       +
               "ffprobe",

     

       63
       63
       +
               "-v", "error",

     

       64
       64
       +
               "-show_format",

     

       65
       65
       +
               "-show_streams",

     

       66
       66
       +
               "-print_format", "json",

     

       67
       67
       +
               "pipe:0",

     

       68
       68
       +
           ]

     

       69
       69
       +
           proc = subprocess.run(

     

       70
       70
       +
               cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       71
       71
       +
           )

     

       72
       72
       +
       

     

       73
       73
       +
           if proc.returncode != 0:

     

       74
       74
       +
               raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")

     

       75
       75
       +
       

     

       76
       76
       +
           return json.loads(proc.stdout)

     

       77
       77
       +
       

     

       78
       78
       +
       

     

       79
       79
       +
       def convert_to_mp4(video_bytes: bytes) -> bytes:

     

       80
       80
       +
           cmd = [

     

       81
       81
       +
               "ffmpeg",

     

       82
       82
       +
               "-i", "pipe:0",

     

       83
       83
       +
               "-c:v", "libx264",

     

       84
       84
       +
               "-crf", "30",

     

       85
       85
       +
               "-preset", "slow",

     

       86
       86
       +
               "-c:a", "aac",

     

       87
       87
       +
               "-b:a", "128k",

     

       88
       88
       +
               "-movflags", "frag_keyframe+empty_moov+default_base_moof",

     

       89
       89
       +
               "-f", "mp4",

     

       90
       90
       +
               "pipe:1",

     

       91
       91
       +
           ]

     

       92
       92
       +
       

     

       93
       93
       +
           proc = subprocess.Popen(

     

       94
       94
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       95
       95
       +
           )

     

       96
       96
       +
           out_bytes, err = proc.communicate(input=video_bytes)

     

       97
       97
       +
       

     

       98
       98
       +
           if proc.returncode != 0:

     

       99
       99
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       100
       100
       +
       

     

       101
       101
       +
           return out_bytes

     

       102
       102
       +
       

     

       103
       103
       +
       

     

       104
       104
       +
       def compress_image(image_bytes: bytes, quality: int = 90):

     

       105
       105
       +
           cmd = [

     

       106
       106
       +
               "ffmpeg",

     

       107
       107
       +
               "-f", "image2pipe",

     

       108
       108
       +
               "-i", "pipe:0",

     

       109
       109
       +
               "-c:v", "webp",

     

       110
       110
       +
               "-q:v", str(quality),

     

       111
       111
       +
               "-f", "image2pipe",

     

       112
       112
       +
               "pipe:1",

     

       113
       113
       +
           ]

     

       114
       114
       +
       

     

       115
       115
       +
           proc = subprocess.Popen(

     

       116
       116
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       117
       117
       +
           )

     

       118
       118
       +
           out_bytes, err = proc.communicate(input=image_bytes)

     

       119
       119
       +
       

     

       120
       120
       +
           if proc.returncode != 0:

     

       121
       121
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       122
       122
       +
       

     

       123
       123
       +
           return out_bytes

     

       124
       124
       +
       

     

       125
       125
       +
       

     

       126
       126
       +
       def download_blob(url: str, max_bytes: int = 5_000_000) -> bytes | None:

     

       127
       127
       +
           response = requests.get(url, stream=True, timeout=20)

     

       128
       128
       +
           if response.status_code != 200:

     

       129
       129
       +
               LOGGER.info("Failed to download %s! %s", url, response.text)

     

       130
       130
       +
               return None

     

       131
       131
       +
       

     

       132
       132
       +
           downloaded_bytes = b""

     

       133
       133
       +
           current_size = 0

     

       134
       134
       +
       

     

       135
       135
       +
           for chunk in response.iter_content(chunk_size=8192):

     

       136
       136
       +
               if not chunk:

     

       137
       137
       +
                   continue

     

       138
       138
       +
       

     

       139
       139
       +
               current_size += len(chunk)

     

       140
       140
       +
               if current_size > max_bytes:

     

       141
       141
       +
                   response.close()

     

       142
       142
       +
                   return None

     

       143
       143
       +
       

     

       144
       144
       +
               downloaded_bytes += chunk

     

       145
       145
       +
       

     

       146
       146
       +
           return downloaded_bytes

     

       147
       147
       +
       

     

       148
       148
       +
       

     

       149
       149
       +
       def get_media_meta(bytes: bytes):

     

       150
       150
       +
           probe = probe_bytes(bytes)

     

       151
       151
       +
           streams = [s for s in probe["streams"] if s["codec_type"] == "video"]

     

       152
       152
       +
           if not streams:

     

       153
       153
       +
               raise ValueError("No video stream found")

     

       154
       154
       +
       

     

       155
       155
       +
           media = streams[0]

     

       156
       156
       +
           return {

     

       157
       157
       +
               "width": int(media["width"]),

     

       158
       158
       +
               "height": int(media["height"]),

     

       159
       159
       +
               "duration": float(media.get("duration", probe["format"].get("duration", -1))),

     

       160
       160
       +
           }

+43

util/util.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import logging

     

       3
       3
       +
       import os

     

       4
       4
       +
       import sys

     

       5
       5
       +
       

     

       6
       6
       +
       logging.basicConfig(stream=sys.stdout, level=logging.INFO)

     

       7
       7
       +
       LOGGER = logging.getLogger("XPost")

     

       8
       8
       +
       

     

       9
       9
       +
       

     

       10
       10
       +
       def as_json(obj, indent=None, sort_keys=False) -> str:

     

       11
       11
       +
           return json.dumps(

     

       12
       12
       +
               obj.__dict__ if not isinstance(obj, dict) else obj,

     

       13
       13
       +
               default=lambda o: o.__json__() if hasattr(o, "__json__") else o.__dict__,

     

       14
       14
       +
               indent=indent,

     

       15
       15
       +
               sort_keys=sort_keys,

     

       16
       16
       +
           )

     

       17
       17
       +
       

     

       18
       18
       +
       

     

       19
       19
       +
       def canonical_label(label: str | None, href: str):

     

       20
       20
       +
           if not label or label == href:

     

       21
       21
       +
               return True

     

       22
       22
       +
       

     

       23
       23
       +
           split = href.split("://", 1)

     

       24
       24
       +
           if len(split) > 1:

     

       25
       25
       +
               if split[1] == label:

     

       26
       26
       +
                   return True

     

       27
       27
       +
       

     

       28
       28
       +
           return False

     

       29
       29
       +
       

     

       30
       30
       +
       

     

       31
       31
       +
       def safe_get(obj: dict, key: str, default):

     

       32
       32
       +
           val = obj.get(key, default)

     

       33
       33
       +
           return val if val else default

     

       34
       34
       +
       

     

       35
       35
       +
       

     

       36
       36
       +
       def as_envvar(text: str | None) -> str | None:

     

       37
       37
       +
           if not text:

     

       38
       38
       +
               return None

     

       39
       39
       +
       

     

       40
       40
       +
           if text.startswith("env:"):

     

       41
       41
       +
               return os.environ.get(text[4:], "")

     

       42
       42
       +
       

     

       43
       43
       +
           return text

-92

util.py

···

       1
       1
       -
       import re, html

     

       2
       2
       -
       

     

       3
       3
       -
       NEWLINE = re.compile(r'</p>|<br\s*/?>', re.IGNORECASE)

     

       4
       4
       -
       NON_ANCHORS = re.compile(r'(?i)</?(?!a\b)[a-z][^>]*>')

     

       5
       5
       -
       ANCHORS = re.compile(r'<a\s+[^>]*href=["\'](.*?)["\'][^>]*>(.*?)</a>', re.IGNORECASE)

     

       6
       6
       -
       

     

       7
       7
       -
       DEFAULT_SETTINGS: dict = {

     

       8
       8
       -
           'bluesky': {

     

       9
       9
       -
               'quote_gate': False,

     

       10
       10
       -
               'thread_gate': [

     

       11
       11
       -
                   'everybody'

     

       12
       12
       -
               ]

     

       13
       13
       -
           }

     

       14
       14
       -
       }

     

       15
       15
       -
       

     

       16
       16
       -
       def tokenize_html(content: str) -> list[dict]:

     

       17
       17
       -
           text = content.replace('<p>', '')

     

       18
       18
       -
           text = NEWLINE.sub('\n', text)

     

       19
       19
       -
           text = html.unescape(text)

     

       20
       20
       -
           text = NON_ANCHORS.sub('', text)

     

       21
       21
       -
           text = text.rstrip('\n')

     

       22
       22
       -
       

     

       23
       23
       -
           tokens = []

     

       24
       24
       -
           pos = 0

     

       25
       25
       -
       

     

       26
       26
       -
           for anchor in ANCHORS.finditer(text):

     

       27
       27
       -
               start, end = anchor.span()

     

       28
       28
       -
               

     

       29
       29
       -
               if start > pos:

     

       30
       30
       -
                   tokens.append({"type": "text", "value": text[pos:start]})

     

       31
       31
       -
       

     

       32
       32
       -
               href = anchor.group(1).strip()

     

       33
       33
       -
               label = anchor.group(2).strip()

     

       34
       34
       -
       

     

       35
       35
       -
               if label.startswith("#"):

     

       36
       36
       -
                   tokens.append({"type": "hashtag", "value": label})

     

       37
       37
       -
               elif label.startswith("@"):

     

       38
       38
       -
                   tokens.append({"type": "mention", "value": label})

     

       39
       39
       -
               else:

     

       40
       40
       -
                   tokens.append({"type": "link", "value": href, "label": label})

     

       41
       41
       -
       

     

       42
       42
       -
               pos = end

     

       43
       43
       -
       

     

       44
       44
       -
           if pos < len(text):

     

       45
       45
       -
               tokens.append({"type": "text", "value": text[pos:]})

     

       46
       46
       -
       

     

       47
       47
       -
           return tokens

     

       48
       48
       -
       

     

       49
       49
       -
       def split_tokens(tokens: list[dict], max_chars: int) -> list[list[dict]]:

     

       50
       50
       -
           chunks = []

     

       51
       51
       -
           current_chunk = []

     

       52
       52
       -
           current_length = 0

     

       53
       53
       -
       

     

       54
       54
       -
           for token in tokens:

     

       55
       55
       -
               token_type = token["type"]

     

       56
       56
       -
               value = token["value"]

     

       57
       57
       -
       

     

       58
       58
       -
               val_len = len(value)

     

       59
       59
       -
       

     

       60
       60
       -
               if token_type != "text":

     

       61
       61
       -
                   if current_length + val_len > max_chars:

     

       62
       62
       -
                       if current_chunk:

     

       63
       63
       -
                           chunks.append(current_chunk)

     

       64
       64
       -
                       current_chunk = [token]

     

       65
       65
       -
                       current_length = val_len

     

       66
       66
       -
                   else:

     

       67
       67
       -
                       current_chunk.append(token)

     

       68
       68
       -
                       current_length += val_len

     

       69
       69
       -
               else:

     

       70
       70
       -
                   start = 0

     

       71
       71
       -
                   while start < val_len:

     

       72
       72
       -
                       space_left = max_chars - current_length

     

       73
       73
       -
                       if space_left == 0:

     

       74
       74
       -
                           chunks.append(current_chunk)

     

       75
       75
       -
                           current_chunk = []

     

       76
       76
       -
                           current_length = 0

     

       77
       77
       -
                           space_left = max_chars

     

       78
       78
       -
       

     

       79
       79
       -
                       end = min(start + space_left, val_len)

     

       80
       80
       -
                       piece = value[start:end]

     

       81
       81
       -
                       current_chunk.append({"type": "text", "value": piece})

     

       82
       82
       -
                       current_length += len(piece)

     

       83
       83
       -
                       start = end

     

       84
       84
       -
       

     

       85
       85
       -
           if current_chunk:

     

       86
       86
       -
               chunks.append(current_chunk)

     

       87
       87
       -
       

     

       88
       88
       -
           return chunks

     

       89
       89
       -
       

     

       90
       90
       -
       def safe_get(obj: dict, key: str, default):

     

       91
       91
       -
           val = obj.get(key, default)

     

       92
       92
       -
           return val if val else default

+14 -1

uv.lock

···

       333
       333
        
       ]

     

       334
       334
        
       

     

       335
       335
        
       [[package]]

     

       336
       336
       +
       name = "python-magic"

     

       337
       337
       +
       version = "0.4.27"

     

       338
       338
       +
       source = { registry = "https://pypi.org/simple" }

     

       339
       339
       +
       sdist = { url = "https://files.pythonhosted.org/packages/da/db/0b3e28ac047452d079d375ec6798bf76a036a08182dbb39ed38116a49130/python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b", size = 14677, upload-time = "2022-06-07T20:16:59.508Z" }

     

       340
       340
       +
       wheels = [

     

       341
       341
       +
           { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" },

     

       342
       342
       +
       ]

     

       343
       343
       +
       

     

       344
       344
       +
       [[package]]

     

       336
       345
        
       name = "requests"

     

       337
       346
        
       version = "2.32.3"

     

       338
       347
        
       source = { registry = "https://pypi.org/simple" }

     
···

       419
       428
        
       

     

       420
       429
        
       [[package]]

     

       421
       430
        
       name = "xpost"

     

       422
       422
       -
       version = "0.1.0"

     

       431
       431
       +
       version = "0.0.3"

     

       423
       432
        
       source = { virtual = "." }

     

       424
       433
        
       dependencies = [

     

       425
       434
        
           { name = "atproto" },

     

       426
       435
        
           { name = "click" },

     

       436
       436
       +
           { name = "python-magic" },

     

       427
       437
        
           { name = "requests" },

     

       438
       438
       +
           { name = "websockets" },

     

       428
       439
        
       ]

     

       429
       440
        
       

     

       430
       441
        
       [package.metadata]

     

       431
       442
        
       requires-dist = [

     

       432
       443
        
           { name = "atproto", specifier = ">=0.0.61" },

     

       433
       444
        
           { name = "click", specifier = ">=8.2.1" },

     

       445
       445
       +
           { name = "python-magic", specifier = ">=0.4.27" },

     

       434
       446
        
           { name = "requests", specifier = ">=2.32.3" },

     

       447
       447
       +
           { name = "websockets", specifier = ">=13.1" },

     

       435
       448
        
       ]

Compare changes