comparing a32d63d2aef8979ae35ca5ecdd5a43f591125df3 and next on zenfyr.dev/xpost

+15

.tangled/workflows/run-tests.yml

···

       1
       1
       +
       when:

     

       2
       2
       +
         - event: ["push", "manual"]

     

       3
       3
       +
           branch: ["next"]

     

       4
       4
       +
       

     

       5
       5
       +
       engine: nixery

     

       6
       6
       +
       

     

       7
       7
       +
       dependencies:

     

       8
       8
       +
         nixpkgs:

     

       9
       9
       +
           - uv

     

       10
       10
       +
           - python312

     

       11
       11
       +
       

     

       12
       12
       +
       steps:

     

       13
       13
       +
         - name: run tests

     

       14
       14
       +
           command: |

     

       15
       15
       +
             uv run --python python3.12 pytest -vv

+8 -3

README.md

···

       1
       1
       -
       # XPost

     

       1
       1
       +
       # xpost next

     

       2
       2
       +
       

     

       3
       3
       +
       > [!NOTE]

     

       4
       4
       +
       > this is the dev branch for xpost next, a full rewrite of xpost. the older version is available on the master branch.

     

       5
       5
       +
       >

     

       6
       6
       +
       > planned work for this branch can be found and tracked here: https://tangled.org/@zenfyr.dev/xpost/issues/1

     

       2
       7
        
       

     

       3
       3
       -
       XPost is a social media cross-posting tool that differs from others by using streaming APIs to allow instant, zero-input cross-posting. this means you can continue posting on your preferred platform without using special apps.

     

       8
       8
       +
       xpost is a social media cross-posting tool that differs from others by using streaming APIs to allow instant, zero-input cross-posting. this means you can continue posting on your preferred platform without using special apps.

     

       4
       9
        
       

     

       5
       5
       -
       XPost tries to support as many features as possible. for example, when cross-posting from mastodon to bluesky, unsupported file types will be attached as links. posts with mixed media or too many files will be split and spread across text.

     

       10
       10
       +
       xpost tries to support as many features as possible. for example, when cross-posting from mastodon to bluesky, unsupported file types will be attached as links. posts with mixed media or too many files will be split and spread across text.

+164

atproto/identity.py

···

       1
       1
       +
       from pathlib import Path

     

       2
       2
       +
       from typing import Any, override

     

       3
       3
       +
       

     

       4
       4
       +
       import dns.resolver

     

       5
       5
       +
       import requests

     

       6
       6
       +
       

     

       7
       7
       +
       import env

     

       8
       8
       +
       from util.cache import Cacheable, TTLCache

     

       9
       9
       +
       from util.util import LOGGER, normalize_service_url, shutdown_hook

     

       10
       10
       +
       

     

       11
       11
       +
       

     

       12
       12
       +
       class DidDocument():

     

       13
       13
       +
           def __init__(self, raw_doc: dict[str, Any]) -> None:

     

       14
       14
       +
               self.raw: dict[str, Any] = raw_doc

     

       15
       15
       +
               self.atproto_pds: str | None = None

     

       16
       16
       +
       

     

       17
       17
       +
           def get_atproto_pds(self) -> str | None:

     

       18
       18
       +
               if self.atproto_pds:

     

       19
       19
       +
                   return self.atproto_pds

     

       20
       20
       +
       

     

       21
       21
       +
               services = self.raw.get("service")

     

       22
       22
       +
               if not services:

     

       23
       23
       +
                   return None

     

       24
       24
       +
       

     

       25
       25
       +
               for service in services:

     

       26
       26
       +
                   if (

     

       27
       27
       +
                       service.get("id") == "#atproto_pds"

     

       28
       28
       +
                       and service.get("type") == "AtprotoPersonalDataServer"

     

       29
       29
       +
                   ):

     

       30
       30
       +
                       endpoint = service.get("serviceEndpoint")

     

       31
       31
       +
                       if endpoint:

     

       32
       32
       +
                           url = normalize_service_url(endpoint)

     

       33
       33
       +
                           self.atproto_pds = url

     

       34
       34
       +
                           return url

     

       35
       35
       +
               self.atproto_pds = ""

     

       36
       36
       +
               return None

     

       37
       37
       +
       

     

       38
       38
       +
       

     

       39
       39
       +
       class DidResolver(Cacheable):

     

       40
       40
       +
           def __init__(self, plc_host: str) -> None:

     

       41
       41
       +
               self.plc_host: str = plc_host

     

       42
       42
       +
               self.__cache: TTLCache[str, DidDocument] = TTLCache(ttl_seconds=12 * 60 * 60)

     

       43
       43
       +
       

     

       44
       44
       +
           def try_resolve_plc(self, did: str) -> DidDocument | None:

     

       45
       45
       +
               url = f"{self.plc_host}/{did}"

     

       46
       46
       +
               response = requests.get(url, timeout=10, allow_redirects=True)

     

       47
       47
       +
       

     

       48
       48
       +
               if response.status_code == 200:

     

       49
       49
       +
                   return DidDocument(response.json())

     

       50
       50
       +
               elif response.status_code == 404 or response.status_code == 410:

     

       51
       51
       +
                   return None  # tombstone or not registered

     

       52
       52
       +
               else:

     

       53
       53
       +
                   response.raise_for_status()

     

       54
       54
       +
       

     

       55
       55
       +
           def try_resolve_web(self, did: str) -> DidDocument | None:

     

       56
       56
       +
               url = f"http://{did[len('did:web:') :]}/.well-known/did.json"

     

       57
       57
       +
               response = requests.get(url, timeout=10, allow_redirects=True)

     

       58
       58
       +
       

     

       59
       59
       +
               if response.status_code == 200:

     

       60
       60
       +
                   return DidDocument(response.json())

     

       61
       61
       +
               elif response.status_code == 404 or response.status_code == 410:

     

       62
       62
       +
                   return None  # tombstone or gone

     

       63
       63
       +
               else:

     

       64
       64
       +
                   response.raise_for_status()

     

       65
       65
       +
       

     

       66
       66
       +
           def resolve_did(self, did: str) -> DidDocument:

     

       67
       67
       +
               cached = self.__cache.get(did)

     

       68
       68
       +
               if cached:

     

       69
       69
       +
                   return cached

     

       70
       70
       +
       

     

       71
       71
       +
               if did.startswith("did:plc:"):

     

       72
       72
       +
                   from_plc = self.try_resolve_plc(did)

     

       73
       73
       +
                   if from_plc:

     

       74
       74
       +
                       self.__cache.set(did, from_plc)

     

       75
       75
       +
                       return from_plc

     

       76
       76
       +
               elif did.startswith("did:web:"):

     

       77
       77
       +
                   from_web = self.try_resolve_web(did)

     

       78
       78
       +
                   if from_web:

     

       79
       79
       +
                       self.__cache.set(did, from_web)

     

       80
       80
       +
                       return from_web

     

       81
       81
       +
               raise Exception(f"Failed to resolve {did}!")

     

       82
       82
       +
       

     

       83
       83
       +
           @override

     

       84
       84
       +
           def dump_cache(self, path: Path):

     

       85
       85
       +
               self.__cache.dump_cache(path)

     

       86
       86
       +
       

     

       87
       87
       +
           @override

     

       88
       88
       +
           def load_cache(self, path: Path):

     

       89
       89
       +
               self.__cache.load_cache(path)

     

       90
       90
       +
       

     

       91
       91
       +
       class HandleResolver(Cacheable):

     

       92
       92
       +
           def __init__(self) -> None:

     

       93
       93
       +
               self.__cache: TTLCache[str, str] = TTLCache(ttl_seconds=12 * 60 * 60)

     

       94
       94
       +
       

     

       95
       95
       +
           def try_resolve_dns(self, handle: str) -> str | None:

     

       96
       96
       +
               try:

     

       97
       97
       +
                   dns_query = f"_atproto.{handle}"

     

       98
       98
       +
                   answers = dns.resolver.resolve(dns_query, "TXT")

     

       99
       99
       +
       

     

       100
       100
       +
                   for rdata in answers:

     

       101
       101
       +
                       for txt_data in rdata.strings:

     

       102
       102
       +
                           did = txt_data.decode("utf-8").strip()

     

       103
       103
       +
                           if did.startswith("did="):

     

       104
       104
       +
                               return did[4:]

     

       105
       105
       +
               except dns.resolver.NXDOMAIN:

     

       106
       106
       +
                   LOGGER.debug(f"DNS record not found for _atproto.{handle}")

     

       107
       107
       +
                   return None

     

       108
       108
       +
               except dns.resolver.NoAnswer:

     

       109
       109
       +
                   LOGGER.debug(f"No TXT records found for _atproto.{handle}")

     

       110
       110
       +
                   return None

     

       111
       111
       +
       

     

       112
       112
       +
           def try_resolve_http(self, handle: str) -> str | None:

     

       113
       113
       +
               url = f"http://{handle}/.well-known/atproto-did"

     

       114
       114
       +
               response = requests.get(url, timeout=10, allow_redirects=True)

     

       115
       115
       +
       

     

       116
       116
       +
               if response.status_code == 200:

     

       117
       117
       +
                   did = response.text.strip()

     

       118
       118
       +
                   if did.startswith("did:"):

     

       119
       119
       +
                       return did

     

       120
       120
       +
                   else:

     

       121
       121
       +
                       raise ValueError(f"Got invalid did: from {url} = {did}!")

     

       122
       122
       +
               else:

     

       123
       123
       +
                   response.raise_for_status()

     

       124
       124
       +
       

     

       125
       125
       +
           def resolve_handle(self, handle: str) -> str:

     

       126
       126
       +
               cached = self.__cache.get(handle)

     

       127
       127
       +
               if cached:

     

       128
       128
       +
                   return cached

     

       129
       129
       +
       

     

       130
       130
       +
               from_dns = self.try_resolve_dns(handle)

     

       131
       131
       +
               if from_dns:

     

       132
       132
       +
                   self.__cache.set(handle, from_dns)

     

       133
       133
       +
                   return from_dns

     

       134
       134
       +
       

     

       135
       135
       +
               from_http = self.try_resolve_http(handle)

     

       136
       136
       +
               if from_http:

     

       137
       137
       +
                   self.__cache.set(handle, from_http)

     

       138
       138
       +
                   return from_http

     

       139
       139
       +
       

     

       140
       140
       +
               raise Exception(f"Failed to resolve handle {handle}!")

     

       141
       141
       +
       

     

       142
       142
       +
           @override

     

       143
       143
       +
           def dump_cache(self, path: Path):

     

       144
       144
       +
               self.__cache.dump_cache(path)

     

       145
       145
       +
       

     

       146
       146
       +
           @override

     

       147
       147
       +
           def load_cache(self, path: Path):

     

       148
       148
       +
               self.__cache.load_cache(path)

     

       149
       149
       +
       

     

       150
       150
       +
       

     

       151
       151
       +
       handle_resolver = HandleResolver()

     

       152
       152
       +
       did_resolver = DidResolver(env.PLC_HOST)

     

       153
       153
       +
       

     

       154
       154
       +
       did_cache = env.CACHE_DIR.joinpath('did.cache')

     

       155
       155
       +
       handle_cache = env.CACHE_DIR.joinpath('handle.cache')

     

       156
       156
       +
       

     

       157
       157
       +
       did_resolver.load_cache(did_cache)

     

       158
       158
       +
       handle_resolver.load_cache(handle_cache)

     

       159
       159
       +
       

     

       160
       160
       +
       def cache_dump():

     

       161
       161
       +
           did_resolver.dump_cache(did_cache)

     

       162
       162
       +
           handle_resolver.dump_cache(handle_cache)

     

       163
       163
       +
       

     

       164
       164
       +
       shutdown_hook.append(cache_dump)

+11

atproto/util.py

···

       1
       1
       +
       URI = "at://"

     

       2
       2
       +
       URI_LEN = len(URI)

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       class AtUri:

     

       6
       6
       +
           @classmethod

     

       7
       7
       +
           def record_uri(cls, uri: str) -> tuple[str, str, str]:

     

       8
       8
       +
               did, collection, rid = uri[URI_LEN:].split("/")

     

       9
       9
       +
               if not (did and collection and rid):

     

       10
       10
       +
                   raise ValueError(f"Ivalid record uri {uri}!")

     

       11
       11
       +
               return did, collection, rid

+50

bluesky/info.py

···

       1
       1
       +
       from abc import ABC, abstractmethod

     

       2
       2
       +
       from typing import Any, override

     

       3
       3
       +
       

     

       4
       4
       +
       from atproto.identity import did_resolver, handle_resolver

     

       5
       5
       +
       from cross.service import Service

     

       6
       6
       +
       from util.util import normalize_service_url

     

       7
       7
       +
       

     

       8
       8
       +
       SERVICE = "https://bsky.app"

     

       9
       9
       +
       

     

       10
       10
       +
       

     

       11
       11
       +
       def validate_and_transform(data: dict[str, Any]):

     

       12
       12
       +
           if not data["handle"] and not data["did"]:

     

       13
       13
       +
               raise KeyError("no 'handle' or 'did' specified for bluesky input!")

     

       14
       14
       +
       

     

       15
       15
       +
           if "did" in data:

     

       16
       16
       +
               did = str(data["did"])  # only did:web and did:plc are supported

     

       17
       17
       +
               if not did.startswith("did:plc:") and not did.startswith("did:web:"):

     

       18
       18
       +
                   raise ValueError(f"Invalid handle {did}!")

     

       19
       19
       +
       

     

       20
       20
       +
           if "pds" in data:

     

       21
       21
       +
               data["pds"] = normalize_service_url(data["pds"])

     

       22
       22
       +
       

     

       23
       23
       +
       

     

       24
       24
       +
       class BlueskyService(ABC, Service):

     

       25
       25
       +
           pds: str

     

       26
       26
       +
           did: str

     

       27
       27
       +
       

     

       28
       28
       +
           def _init_identity(self) -> None:

     

       29
       29
       +
               handle, did, pds = self.get_identity_options()

     

       30
       30
       +
               if did:

     

       31
       31
       +
                   self.did = did

     

       32
       32
       +
               if pds:

     

       33
       33
       +
                   self.pds = pds

     

       34
       34
       +
       

     

       35
       35
       +
               if not did:

     

       36
       36
       +
                   if not handle:

     

       37
       37
       +
                       raise KeyError("No did: or atproto handle provided!")

     

       38
       38
       +
                   self.log.info("Resolving ATP identity for %s...", handle)

     

       39
       39
       +
                   self.did = handle_resolver.resolve_handle(handle)

     

       40
       40
       +
       

     

       41
       41
       +
               if not pds:

     

       42
       42
       +
                   self.log.info("Resolving PDS from %s DID document...", self.did)

     

       43
       43
       +
                   atp_pds = did_resolver.resolve_did(self.did).get_atproto_pds()

     

       44
       44
       +
                   if not atp_pds:

     

       45
       45
       +
                       raise Exception("Failed to resolve atproto pds for %s")

     

       46
       46
       +
                   self.pds = atp_pds

     

       47
       47
       +
       

     

       48
       48
       +
           @abstractmethod

     

       49
       49
       +
           def get_identity_options(self) -> tuple[str | None, str | None, str | None]:

     

       50
       50
       +
               pass

+283

bluesky/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from abc import ABC

     

       5
       5
       +
       from dataclasses import dataclass, field

     

       6
       6
       +
       from typing import Any, cast, override

     

       7
       7
       +
       

     

       8
       8
       +
       import websockets

     

       9
       9
       +
       

     

       10
       10
       +
       from atproto.util import AtUri

     

       11
       11
       +
       from bluesky.tokens import tokenize_post

     

       12
       12
       +
       from bluesky.info import SERVICE, BlueskyService, validate_and_transform

     

       13
       13
       +
       from cross.attachments import (

     

       14
       14
       +
           LabelsAttachment,

     

       15
       15
       +
           LanguagesAttachment,

     

       16
       16
       +
           MediaAttachment,

     

       17
       17
       +
           QuoteAttachment,

     

       18
       18
       +
           RemoteUrlAttachment,

     

       19
       19
       +
       )

     

       20
       20
       +
       from cross.media import Blob, download_blob

     

       21
       21
       +
       from cross.post import Post

     

       22
       22
       +
       from cross.service import InputService

     

       23
       23
       +
       from database.connection import DatabasePool

     

       24
       24
       +
       from util.util import normalize_service_url

     

       25
       25
       +
       

     

       26
       26
       +
       

     

       27
       27
       +
       @dataclass(kw_only=True)

     

       28
       28
       +
       class BlueskyInputOptions:

     

       29
       29
       +
           handle: str | None = None

     

       30
       30
       +
           did: str | None = None

     

       31
       31
       +
           pds: str | None = None

     

       32
       32
       +
           filters: list[re.Pattern[str]] = field(default_factory=lambda: [])

     

       33
       33
       +
       

     

       34
       34
       +
           @classmethod

     

       35
       35
       +
           def from_dict(cls, data: dict[str, Any]) -> "BlueskyInputOptions":

     

       36
       36
       +
               validate_and_transform(data)

     

       37
       37
       +
       

     

       38
       38
       +
               if "filters" in data:

     

       39
       39
       +
                   data["filters"] = [re.compile(r) for r in data["filters"]]

     

       40
       40
       +
       

     

       41
       41
       +
               return BlueskyInputOptions(**data)

     

       42
       42
       +
       

     

       43
       43
       +
       

     

       44
       44
       +
       @dataclass(kw_only=True)

     

       45
       45
       +
       class BlueskyJetstreamInputOptions(BlueskyInputOptions):

     

       46
       46
       +
           jetstream: str = "wss://jetstream2.us-west.bsky.network/subscribe"

     

       47
       47
       +
       

     

       48
       48
       +
           @classmethod

     

       49
       49
       +
           def from_dict(cls, data: dict[str, Any]) -> "BlueskyJetstreamInputOptions":

     

       50
       50
       +
               jetstream = data.pop("jetstream", None)

     

       51
       51
       +
       

     

       52
       52
       +
               base = BlueskyInputOptions.from_dict(data).__dict__.copy()

     

       53
       53
       +
               if jetstream:

     

       54
       54
       +
                   base["jetstream"] = normalize_service_url(jetstream)

     

       55
       55
       +
       

     

       56
       56
       +
               return BlueskyJetstreamInputOptions(**base)

     

       57
       57
       +
       

     

       58
       58
       +
       

     

       59
       59
       +
       class BlueskyBaseInputService(BlueskyService, InputService, ABC):

     

       60
       60
       +
           def __init__(self, db: DatabasePool) -> None:

     

       61
       61
       +
               super().__init__(SERVICE, db)

     

       62
       62
       +
       

     

       63
       63
       +
           def _on_post(self, record: dict[str, Any]):

     

       64
       64
       +
               post_uri = cast(str, record["$xpost.strongRef"]["uri"])

     

       65
       65
       +
               post_cid = cast(str, record["$xpost.strongRef"]["cid"])

     

       66
       66
       +
       

     

       67
       67
       +
               parent_uri = cast(

     

       68
       68
       +
                   str, None if not record.get("reply") else record["reply"]["parent"]["uri"]

     

       69
       69
       +
               )

     

       70
       70
       +
               parent = None

     

       71
       71
       +
               if parent_uri:

     

       72
       72
       +
                   parent = self._get_post(self.url, self.did, parent_uri)

     

       73
       73
       +
                   if not parent:

     

       74
       74
       +
                       self.log.info(

     

       75
       75
       +
                           "Skipping %s, parent %s not found in db", post_uri, parent_uri

     

       76
       76
       +
                       )

     

       77
       77
       +
                       return

     

       78
       78
       +
       

     

       79
       79
       +
               tokens = tokenize_post(record["text"], record.get('facets', {}))

     

       80
       80
       +
               post = Post(id=post_uri, parent_id=parent_uri, tokens=tokens)

     

       81
       81
       +
       

     

       82
       82
       +
               did, _, rid = AtUri.record_uri(post_uri)

     

       83
       83
       +
               post.attachments.put(

     

       84
       84
       +
                   RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}")

     

       85
       85
       +
               )

     

       86
       86
       +
       

     

       87
       87
       +
               embed: dict[str, Any] = record.get("embed", {})

     

       88
       88
       +
               blob_urls: list[tuple[str, str, str | None]] = []

     

       89
       89
       +
               def handle_embeds(embed: dict[str, Any]) -> str | None:

     

       90
       90
       +
                   nonlocal blob_urls, post

     

       91
       91
       +
                   match cast(str, embed["$type"]):

     

       92
       92
       +
                       case "app.bsky.embed.record" | "app.bsky.embed.recordWithMedia":

     

       93
       93
       +
                           rcrd = embed['record']['record'] if embed['record'].get('record') else embed['record']

     

       94
       94
       +
                           did, collection, _ = AtUri.record_uri(rcrd["uri"])

     

       95
       95
       +
                           if collection != "app.bsky.feed.post":

     

       96
       96
       +
                               return f"Unhandled record collection {collection}"

     

       97
       97
       +
                           if did != self.did:

     

       98
       98
       +
                               return ""

     

       99
       99
       +
       

     

       100
       100
       +
                           rquote = self._get_post(self.url, did, rcrd["uri"])

     

       101
       101
       +
                           if not rquote:

     

       102
       102
       +
                               return f"Quote {rcrd["uri"]} not found in the db"

     

       103
       103
       +
                           post.attachments.put(QuoteAttachment(quoted_id=rcrd["uri"], quoted_user=did))

     

       104
       104
       +
       

     

       105
       105
       +
                           if embed.get('media'):

     

       106
       106
       +
                               return handle_embeds(embed["media"])

     

       107
       107
       +
                       case "app.bsky.embed.images":

     

       108
       108
       +
                           for image in embed["images"]:

     

       109
       109
       +
                               blob_cid = image["image"]["ref"]["$link"]

     

       110
       110
       +
                               url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}"

     

       111
       111
       +
                               blob_urls.append((url, blob_cid, image.get("alt")))

     

       112
       112
       +
                       case "app.bsky.embed.video":

     

       113
       113
       +
                           blob_cid = embed["video"]["ref"]["$link"]

     

       114
       114
       +
                           url = f"{self.pds}/xrpc/com.atproto.sync.getBlob?did={self.did}&cid={blob_cid}"

     

       115
       115
       +
                           blob_urls.append((url, blob_cid, embed.get("alt")))

     

       116
       116
       +
                       case _:

     

       117
       117
       +
                           self.log.warning(f"Unhandled embed type {embed['$type']}")

     

       118
       118
       +
       

     

       119
       119
       +
               if embed:

     

       120
       120
       +
                   fexit = handle_embeds(embed)

     

       121
       121
       +
                   if fexit is not None:

     

       122
       122
       +
                       self.log.info("Skipping %s! %s", post_uri, fexit)

     

       123
       123
       +
                       return

     

       124
       124
       +
       

     

       125
       125
       +
               if blob_urls:

     

       126
       126
       +
                   blobs: list[Blob] = []

     

       127
       127
       +
                   for url, cid, alt in blob_urls:

     

       128
       128
       +
                       self.log.info("Downloading %s...", cid)

     

       129
       129
       +
                       blob: Blob | None = download_blob(url, alt)

     

       130
       130
       +
                       if not blob:

     

       131
       131
       +
                           self.log.error(

     

       132
       132
       +
                               "Skipping %s! Failed to download blob %s.", post_uri, cid

     

       133
       133
       +
                           )

     

       134
       134
       +
                           return

     

       135
       135
       +
                       blobs.append(blob)

     

       136
       136
       +
                   post.attachments.put(MediaAttachment(blobs=blobs))

     

       137
       137
       +
       

     

       138
       138
       +
               if "langs" in record:

     

       139
       139
       +
                   post.attachments.put(LanguagesAttachment(langs=record["langs"]))

     

       140
       140
       +
               if "labels" in record:

     

       141
       141
       +
                   post.attachments.put(

     

       142
       142
       +
                       LabelsAttachment(

     

       143
       143
       +
                           labels=[

     

       144
       144
       +
                               label["val"].replace("-", " ") for label in record["values"]

     

       145
       145
       +
                           ]

     

       146
       146
       +
                       ),

     

       147
       147
       +
                   )

     

       148
       148
       +
       

     

       149
       149
       +
               if parent:

     

       150
       150
       +
                   self._insert_post(

     

       151
       151
       +
                       {

     

       152
       152
       +
                           "user": self.did,

     

       153
       153
       +
                           "service": self.url,

     

       154
       154
       +
                           "identifier": post_uri,

     

       155
       155
       +
                           "parent": parent["id"],

     

       156
       156
       +
                           "root": parent["id"] if not parent["root"] else parent["root"],

     

       157
       157
       +
                           "extra_data": json.dumps({"cid": post_cid}),

     

       158
       158
       +
                       }

     

       159
       159
       +
                   )

     

       160
       160
       +
               else:

     

       161
       161
       +
                   self._insert_post(

     

       162
       162
       +
                       {

     

       163
       163
       +
                           "user": self.did,

     

       164
       164
       +
                           "service": self.url,

     

       165
       165
       +
                           "identifier": post_uri,

     

       166
       166
       +
                           "extra_data": json.dumps({"cid": post_cid}),

     

       167
       167
       +
                       }

     

       168
       168
       +
                   )

     

       169
       169
       +
       

     

       170
       170
       +
               for out in self.outputs:

     

       171
       171
       +
                   self.submitter(lambda: out.accept_post(post))

     

       172
       172
       +
       

     

       173
       173
       +
           def _on_repost(self, record: dict[str, Any]):

     

       174
       174
       +
               post_uri = cast(str, record["$xpost.strongRef"]["uri"])

     

       175
       175
       +
               post_cid = cast(str, record["$xpost.strongRef"]["cid"])

     

       176
       176
       +
       

     

       177
       177
       +
               reposted_uri = cast(str, record["subject"]["uri"])

     

       178
       178
       +
               reposted = self._get_post(self.url, self.did, reposted_uri)

     

       179
       179
       +
               if not reposted:

     

       180
       180
       +
                   self.log.info(

     

       181
       181
       +
                       "Skipping repost '%s' as reposted post '%s' was not found in the db.",

     

       182
       182
       +
                       post_uri,

     

       183
       183
       +
                       reposted_uri,

     

       184
       184
       +
                   )

     

       185
       185
       +
                   return

     

       186
       186
       +
       

     

       187
       187
       +
               self._insert_post(

     

       188
       188
       +
                   {

     

       189
       189
       +
                       "user": self.did,

     

       190
       190
       +
                       "service": self.url,

     

       191
       191
       +
                       "identifier": post_uri,

     

       192
       192
       +
                       "reposted": reposted["id"],

     

       193
       193
       +
                       "extra_data": json.dumps({"cid": post_cid}),

     

       194
       194
       +
                   }

     

       195
       195
       +
               )

     

       196
       196
       +
       

     

       197
       197
       +
               for out in self.outputs:

     

       198
       198
       +
                   self.submitter(lambda: out.accept_repost(post_uri, reposted_uri))

     

       199
       199
       +
       

     

       200
       200
       +
           def _on_delete_post(self, post_id: str, repost: bool):

     

       201
       201
       +
               post = self._get_post(self.url, self.did, post_id)

     

       202
       202
       +
               if not post:

     

       203
       203
       +
                   return

     

       204
       204
       +
       

     

       205
       205
       +
               if repost:

     

       206
       206
       +
                   for output in self.outputs:

     

       207
       207
       +
                       self.submitter(lambda: output.delete_repost(post_id))

     

       208
       208
       +
               else:

     

       209
       209
       +
                   for output in self.outputs:

     

       210
       210
       +
                       self.submitter(lambda: output.delete_post(post_id))

     

       211
       211
       +
               self._delete_post_by_id(post["id"])

     

       212
       212
       +
       

     

       213
       213
       +
       

     

       214
       214
       +
       class BlueskyJetstreamInputService(BlueskyBaseInputService):

     

       215
       215
       +
           def __init__(self, db: DatabasePool, options: BlueskyJetstreamInputOptions) -> None:

     

       216
       216
       +
               super().__init__(db)

     

       217
       217
       +
               self.options: BlueskyJetstreamInputOptions = options

     

       218
       218
       +
               self._init_identity()

     

       219
       219
       +
       

     

       220
       220
       +
           @override

     

       221
       221
       +
           def get_identity_options(self) -> tuple[str | None, str | None, str | None]:

     

       222
       222
       +
               return (self.options.handle, self.options.did, self.options.pds)

     

       223
       223
       +
       

     

       224
       224
       +
           def _accept_msg(self, msg: websockets.Data) -> None:

     

       225
       225
       +
               data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))

     

       226
       226
       +
               if data.get("did") != self.did:

     

       227
       227
       +
                   return

     

       228
       228
       +
               commit: dict[str, Any] | None = data.get("commit")

     

       229
       229
       +
               if not commit:

     

       230
       230
       +
                   return

     

       231
       231
       +
       

     

       232
       232
       +
               commit_type: str = cast(str, commit["operation"])

     

       233
       233
       +
               match commit_type:

     

       234
       234
       +
                   case "create":

     

       235
       235
       +
                       record: dict[str, Any] = cast(dict[str, Any], commit["record"])

     

       236
       236
       +
                       record["$xpost.strongRef"] = {

     

       237
       237
       +
                           "cid": commit["cid"],

     

       238
       238
       +
                           "uri": f"at://{self.did}/{commit['collection']}/{commit['rkey']}",

     

       239
       239
       +
                       }

     

       240
       240
       +
       

     

       241
       241
       +
                       match cast(str, commit["collection"]):

     

       242
       242
       +
                           case "app.bsky.feed.post":

     

       243
       243
       +
                               self._on_post(record)

     

       244
       244
       +
                           case "app.bsky.feed.repost":

     

       245
       245
       +
                               self._on_repost(record)

     

       246
       246
       +
                           case _:

     

       247
       247
       +
                               pass

     

       248
       248
       +
                   case "delete":

     

       249
       249
       +
                       post_id: str = (

     

       250
       250
       +
                           f"at://{self.did}/{commit['collection']}/{commit['rkey']}"

     

       251
       251
       +
                       )

     

       252
       252
       +
                       match cast(str, commit["collection"]):

     

       253
       253
       +
                           case "app.bsky.feed.post":

     

       254
       254
       +
                               self._on_delete_post(post_id, False)

     

       255
       255
       +
                           case "app.bsky.feed.repost":

     

       256
       256
       +
                               self._on_delete_post(post_id, True)

     

       257
       257
       +
                           case _:

     

       258
       258
       +
                               pass

     

       259
       259
       +
                   case _:

     

       260
       260
       +
                       pass

     

       261
       261
       +
       

     

       262
       262
       +
           @override

     

       263
       263
       +
           async def listen(self):

     

       264
       264
       +
               url = self.options.jetstream + "?"

     

       265
       265
       +
               url += "wantedCollections=app.bsky.feed.post"

     

       266
       266
       +
               url += "&wantedCollections=app.bsky.feed.repost"

     

       267
       267
       +
               url += f"&wantedDids={self.did}"

     

       268
       268
       +
       

     

       269
       269
       +
               async for ws in websockets.connect(url):

     

       270
       270
       +
                   try:

     

       271
       271
       +
                       self.log.info("Listening to %s...", self.options.jetstream)

     

       272
       272
       +
       

     

       273
       273
       +
                       async def listen_for_messages():

     

       274
       274
       +
                           async for msg in ws:

     

       275
       275
       +
                               self.submitter(lambda: self._accept_msg(msg))

     

       276
       276
       +
       

     

       277
       277
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       278
       278
       +
       

     

       279
       279
       +
                       _ = await asyncio.gather(listen)

     

       280
       280
       +
                   except websockets.ConnectionClosedError as e:

     

       281
       281
       +
                       self.log.error(e, stack_info=True, exc_info=True)

     

       282
       282
       +
                       self.log.info("Reconnecting to %s...", self.options.jetstream)

     

       283
       283
       +
                       continue

+95

bluesky/tokens.py

···

       1
       1
       +
       from cross.tokens import LinkToken, MentionToken, TagToken, TextToken, Token

     

       2
       2
       +
       

     

       3
       3
       +
       

     

       4
       4
       +
       def tokenize_post(text: str, facets: list[dict]) -> list[Token]:

     

       5
       5
       +
           def decode(ut8: bytes) -> str:

     

       6
       6
       +
               return ut8.decode(encoding="utf-8")

     

       7
       7
       +
       

     

       8
       8
       +
           if not text:

     

       9
       9
       +
               return []

     

       10
       10
       +
           ut8_text = text.encode(encoding="utf-8")

     

       11
       11
       +
           if not facets:

     

       12
       12
       +
               return [TextToken(text=decode(ut8_text))]

     

       13
       13
       +
       

     

       14
       14
       +
           slices: list[tuple[int, int, str, str]] = []

     

       15
       15
       +
       

     

       16
       16
       +
           for facet in facets:

     

       17
       17
       +
               features: list[dict] = facet.get("features", [])

     

       18
       18
       +
               if not features:

     

       19
       19
       +
                   continue

     

       20
       20
       +
       

     

       21
       21
       +
               # we don't support overlapping facets/features

     

       22
       22
       +
               feature = features[0]

     

       23
       23
       +
               feature_type = feature["$type"]

     

       24
       24
       +
               index = facet["index"]

     

       25
       25
       +
               match feature_type:

     

       26
       26
       +
                   case "app.bsky.richtext.facet#tag":

     

       27
       27
       +
                       slices.append(

     

       28
       28
       +
                           (index["byteStart"], index["byteEnd"], "tag", feature["tag"])

     

       29
       29
       +
                       )

     

       30
       30
       +
                   case "app.bsky.richtext.facet#link":

     

       31
       31
       +
                       slices.append(

     

       32
       32
       +
                           (index["byteStart"], index["byteEnd"], "link", feature["uri"])

     

       33
       33
       +
                       )

     

       34
       34
       +
                   case "app.bsky.richtext.facet#mention":

     

       35
       35
       +
                       slices.append(

     

       36
       36
       +
                           (index["byteStart"], index["byteEnd"], "mention", feature["did"])

     

       37
       37
       +
                       )

     

       38
       38
       +
       

     

       39
       39
       +
           if not slices:

     

       40
       40
       +
               return [TextToken(text=decode(ut8_text))]

     

       41
       41
       +
       

     

       42
       42
       +
           slices.sort(key=lambda s: s[0])

     

       43
       43
       +
           unique: list[tuple[int, int, str, str]] = []

     

       44
       44
       +
           current_end = 0

     

       45
       45
       +
           for start, end, ttype, val in slices:

     

       46
       46
       +
               if start >= current_end:

     

       47
       47
       +
                   unique.append((start, end, ttype, val))

     

       48
       48
       +
                   current_end = end

     

       49
       49
       +
       

     

       50
       50
       +
           if not unique:

     

       51
       51
       +
               return [TextToken(text=decode(ut8_text))]

     

       52
       52
       +
       

     

       53
       53
       +
           tokens: list[Token] = []

     

       54
       54
       +
           prev = 0

     

       55
       55
       +
       

     

       56
       56
       +
           for start, end, ttype, val in unique:

     

       57
       57
       +
               if start > prev:

     

       58
       58
       +
                   # text between facets

     

       59
       59
       +
                   tokens.append(TextToken(text=decode(ut8_text[prev:start])))

     

       60
       60
       +
               # facet token

     

       61
       61
       +
               match ttype:

     

       62
       62
       +
                   case "link":

     

       63
       63
       +
                       label = decode(ut8_text[start:end])

     

       64
       64
       +
       

     

       65
       65
       +
                       # try to unflatten links

     

       66
       66
       +
                       split = val.split("://", 1)

     

       67
       67
       +
                       if len(split) > 1:

     

       68
       68
       +
                           if split[1].startswith(label):

     

       69
       69
       +
                               tokens.append(LinkToken(href=val))

     

       70
       70
       +
                               prev = end

     

       71
       71
       +
                               continue

     

       72
       72
       +
       

     

       73
       73
       +
                           if label.endswith("...") and split[1].startswith(label[:-3]):

     

       74
       74
       +
                               tokens.append(LinkToken(href=val))

     

       75
       75
       +
                               prev = end

     

       76
       76
       +
                               continue

     

       77
       77
       +
       

     

       78
       78
       +
                       tokens.append(LinkToken(href=val, label=label))

     

       79
       79
       +
                   case "tag":

     

       80
       80
       +
                       tag = decode(ut8_text[start:end])

     

       81
       81
       +
                       tokens.append(TagToken(tag=tag[1:] if tag.startswith("#") else tag))

     

       82
       82
       +
                   case "mention":

     

       83
       83
       +
                       mention = decode(ut8_text[start:end])

     

       84
       84
       +
                       tokens.append(

     

       85
       85
       +
                           MentionToken(

     

       86
       86
       +
                               username=mention[1:] if mention.startswith("@") else mention,

     

       87
       87
       +
                               uri=val,

     

       88
       88
       +
                           )

     

       89
       89
       +
                       )

     

       90
       90
       +
               prev = end

     

       91
       91
       +
       

     

       92
       92
       +
           if prev < len(ut8_text):

     

       93
       93
       +
               tokens.append(TextToken(text=decode(ut8_text[prev:])))

     

       94
       94
       +
       

     

       95
       95
       +
           return tokens

+23 -8

cross/attachments.py

···

       1
       1
        
       from dataclasses import dataclass

     

       2
       2
        
       

     

       3
       3
       +
       from cross.media import Blob

     

       3
       4
        
       

     

       4
       4
       -
       @dataclass

     

       5
       5
       -
       class Attachment():

     

       5
       5
       +
       

     

       6
       6
       +
       @dataclass(kw_only=True)

     

       7
       7
       +
       class Attachment:

     

       6
       8
        
           pass

     

       7
       9
        
       

     

       8
       8
       -
       @dataclass

     

       9
       9
       -
       class SpoilerAttachment(Attachment):

     

       10
       10
       -
           spoiler: str

     

       11
       10
        
       

     

       12
       12
       -
       @dataclass

     

       11
       11
       +
       @dataclass(kw_only=True)

     

       12
       12
       +
       class LabelsAttachment(Attachment):

     

       13
       13
       +
           labels: list[str]

     

       14
       14
       +
       

     

       15
       15
       +
       

     

       16
       16
       +
       @dataclass(kw_only=True)

     

       13
       17
        
       class LanguagesAttachment(Attachment):

     

       14
       18
        
           langs: list[str]

     

       15
       19
        
       

     

       16
       16
       -
       @dataclass

     

       20
       20
       +
       

     

       21
       21
       +
       @dataclass(kw_only=True)

     

       17
       22
        
       class SensitiveAttachment(Attachment):

     

       18
       23
        
           sensitive: bool

     

       19
       24
        
       

     

       20
       20
       -
       @dataclass

     

       25
       25
       +
       

     

       26
       26
       +
       @dataclass(kw_only=True)

     

       21
       27
        
       class RemoteUrlAttachment(Attachment):

     

       22
       28
        
           url: str

     

       29
       29
       +
       

     

       30
       30
       +
       @dataclass(kw_only=True)

     

       31
       31
       +
       class MediaAttachment(Attachment):

     

       32
       32
       +
           blobs: list[Blob]

     

       33
       33
       +
       

     

       34
       34
       +
       @dataclass(kw_only=True)

     

       35
       35
       +
       class QuoteAttachment(Attachment):

     

       36
       36
       +
           quoted_id: str

     

       37
       37
       +
           quoted_user: str

-18

cross/fragments.py

···

       1
       1
       -
       from dataclasses import dataclass

     

       2
       2
       -
       

     

       3
       3
       -
       @dataclass

     

       4
       4
       -
       class Fragment:

     

       5
       5
       -
           start: int

     

       6
       6
       -
           end: int

     

       7
       7
       -
       

     

       8
       8
       -
       @dataclass

     

       9
       9
       -
       class LinkFragment(Fragment):

     

       10
       10
       -
           url: str

     

       11
       11
       -
       

     

       12
       12
       -
       @dataclass

     

       13
       13
       -
       class TagFragment(Fragment):

     

       14
       14
       -
           tag: str

     

       15
       15
       -
       

     

       16
       16
       -
       @dataclass

     

       17
       17
       -
       class MentionFragment(Fragment):

     

       18
       18
       -
           uri: str

+170

cross/media.py

···

       1
       1
       +
       from dataclasses import dataclass, field

     

       2
       2
       +
       

     

       3
       3
       +
       import json

     

       4
       4
       +
       import re

     

       5
       5
       +
       import os

     

       6
       6
       +
       from typing import Any, cast

     

       7
       7
       +
       import magic

     

       8
       8
       +
       import subprocess

     

       9
       9
       +
       import urllib.parse

     

       10
       10
       +
       

     

       11
       11
       +
       import requests

     

       12
       12
       +
       

     

       13
       13
       +
       FILENAME = re.compile(r'filename="?([^\";]*)"?')

     

       14
       14
       +
       MAGIC = magic.Magic(mime=True)

     

       15
       15
       +
       

     

       16
       16
       +
       

     

       17
       17
       +
       @dataclass

     

       18
       18
       +
       class Blob:

     

       19
       19
       +
           url: str

     

       20
       20
       +
           mime: str

     

       21
       21
       +
           io: bytes = field(repr=False)

     

       22
       22
       +
           name: str | None = None

     

       23
       23
       +
           alt: str | None = None

     

       24
       24
       +
       

     

       25
       25
       +
       

     

       26
       26
       +
       @dataclass

     

       27
       27
       +
       class MediaInfo:

     

       28
       28
       +
           width: int

     

       29
       29
       +
           height: int

     

       30
       30
       +
           duration: float | None = None

     

       31
       31
       +
       

     

       32
       32
       +
       

     

       33
       33
       +
       def mime_from_bytes(io: bytes) -> str:

     

       34
       34
       +
           mime = MAGIC.from_buffer(io)

     

       35
       35
       +
           if not mime:

     

       36
       36
       +
               mime = "application/octet-stream"

     

       37
       37
       +
           return mime

     

       38
       38
       +
       

     

       39
       39
       +
       def download_blob(url: str, alt: str | None = None, max_bytes: int = 100_000_000) -> Blob | None:

     

       40
       40
       +
           name = get_filename_from_url(url)

     

       41
       41
       +
           io = download_chuncked(url, max_bytes)

     

       42
       42
       +
           if not io:

     

       43
       43
       +
               return None

     

       44
       44
       +
           return Blob(url, mime_from_bytes(io), io, name, alt)

     

       45
       45
       +
       

     

       46
       46
       +
       def download_chuncked(url: str, max_bytes: int = 100_000_000) -> bytes | None:

     

       47
       47
       +
           response = requests.get(url, stream=True, timeout=20)

     

       48
       48
       +
           if response.status_code != 200:

     

       49
       49
       +
               return None

     

       50
       50
       +
       

     

       51
       51
       +
           downloaded_bytes = b""

     

       52
       52
       +
           current_size = 0

     

       53
       53
       +
       

     

       54
       54
       +
           for chunk in response.iter_content(chunk_size=8192):

     

       55
       55
       +
               if not chunk:

     

       56
       56
       +
                   continue

     

       57
       57
       +
       

     

       58
       58
       +
               current_size += len(chunk)

     

       59
       59
       +
               if current_size > max_bytes:

     

       60
       60
       +
                   response.close()

     

       61
       61
       +
                   return None

     

       62
       62
       +
       

     

       63
       63
       +
               downloaded_bytes += chunk

     

       64
       64
       +
       

     

       65
       65
       +
           return downloaded_bytes

     

       66
       66
       +
       

     

       67
       67
       +
       

     

       68
       68
       +
       def get_filename_from_url(url: str) -> str:

     

       69
       69
       +
           try:

     

       70
       70
       +
               response = requests.head(url, timeout=5, allow_redirects=True)

     

       71
       71
       +
               disposition = response.headers.get("Content-Disposition")

     

       72
       72
       +
               if disposition:

     

       73
       73
       +
                   filename = FILENAME.findall(disposition)

     

       74
       74
       +
                   if filename:

     

       75
       75
       +
                       return filename[0]

     

       76
       76
       +
           except requests.RequestException:

     

       77
       77
       +
               pass

     

       78
       78
       +
       

     

       79
       79
       +
           parsed_url = urllib.parse.urlparse(url)

     

       80
       80
       +
           base_name = os.path.basename(parsed_url.path)

     

       81
       81
       +
       

     

       82
       82
       +
           # hardcoded fix to return the cid for pds blobs

     

       83
       83
       +
           if base_name == "com.atproto.sync.getBlob":

     

       84
       84
       +
               qs = urllib.parse.parse_qs(parsed_url.query)

     

       85
       85
       +
               if qs and qs.get("cid"):

     

       86
       86
       +
                   return qs["cid"][0]

     

       87
       87
       +
       

     

       88
       88
       +
           return base_name

     

       89
       89
       +
       

     

       90
       90
       +
       

     

       91
       91
       +
       def convert_to_mp4(video: Blob) -> Blob:

     

       92
       92
       +
           cmd = [

     

       93
       93
       +
               "ffmpeg",

     

       94
       94
       +
               "-i", "pipe:0",

     

       95
       95
       +
               "-c:v", "libx264",

     

       96
       96
       +
               "-crf", "30",

     

       97
       97
       +
               "-preset", "slow",

     

       98
       98
       +
               "-c:a", "aac",

     

       99
       99
       +
               "-b:a", "128k",

     

       100
       100
       +
               "-movflags", "frag_keyframe+empty_moov+default_base_moof",

     

       101
       101
       +
               "-f", "mp4",

     

       102
       102
       +
               "pipe:1",

     

       103
       103
       +
           ]

     

       104
       104
       +
       

     

       105
       105
       +
           proc = subprocess.Popen(

     

       106
       106
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       107
       107
       +
           )

     

       108
       108
       +
           out_bytes, err = proc.communicate(input=video.io)

     

       109
       109
       +
       

     

       110
       110
       +
           if proc.returncode != 0:

     

       111
       111
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       112
       112
       +
       

     

       113
       113
       +
           return Blob(video.url, mime_from_bytes(out_bytes), out_bytes, video.name, video.alt)

     

       114
       114
       +
       

     

       115
       115
       +
       

     

       116
       116
       +
       def compress_image(image: Blob, quality: int = 95) -> Blob:

     

       117
       117
       +
           cmd = [

     

       118
       118
       +
               "ffmpeg",

     

       119
       119
       +
               "-f", "image2pipe",

     

       120
       120
       +
               "-i", "pipe:0",

     

       121
       121
       +
               "-c:v", "webp",

     

       122
       122
       +
               "-q:v", str(quality),

     

       123
       123
       +
               "-f", "image2pipe",

     

       124
       124
       +
               "pipe:1",

     

       125
       125
       +
           ]

     

       126
       126
       +
       

     

       127
       127
       +
           proc = subprocess.Popen(

     

       128
       128
       +
               cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       129
       129
       +
           )

     

       130
       130
       +
           out_bytes, err = proc.communicate(input=image.io)

     

       131
       131
       +
       

     

       132
       132
       +
           if proc.returncode != 0:

     

       133
       133
       +
               raise RuntimeError(f"ffmpeg compress failed: {err.decode()}")

     

       134
       134
       +
       

     

       135
       135
       +
           return Blob(image.url, "image/webp", out_bytes, image.name, image.alt)

     

       136
       136
       +
       

     

       137
       137
       +
       

     

       138
       138
       +
       def probe_bytes(bytes: bytes) -> dict[str, Any]:

     

       139
       139
       +
           cmd = [

     

       140
       140
       +
               "ffprobe",

     

       141
       141
       +
               "-v",

     

       142
       142
       +
               "error",

     

       143
       143
       +
               "-show_format",

     

       144
       144
       +
               "-show_streams",

     

       145
       145
       +
               "-print_format",

     

       146
       146
       +
               "json",

     

       147
       147
       +
               "pipe:0",

     

       148
       148
       +
           ]

     

       149
       149
       +
           proc = subprocess.run(

     

       150
       150
       +
               cmd, input=bytes, stdout=subprocess.PIPE, stderr=subprocess.PIPE

     

       151
       151
       +
           )

     

       152
       152
       +
       

     

       153
       153
       +
           if proc.returncode != 0:

     

       154
       154
       +
               raise RuntimeError(f"ffprobe failed: {proc.stderr.decode()}")

     

       155
       155
       +
       

     

       156
       156
       +
           return json.loads(proc.stdout)

     

       157
       157
       +
       

     

       158
       158
       +
       

     

       159
       159
       +
       def get_media_meta(bytes: bytes) -> MediaInfo:

     

       160
       160
       +
           probe = probe_bytes(bytes)

     

       161
       161
       +
           streams = [s for s in probe["streams"] if s["codec_type"] == "video"]

     

       162
       162
       +
           if not streams:

     

       163
       163
       +
               raise ValueError("No video stream found")

     

       164
       164
       +
       

     

       165
       165
       +
           media: dict[str, Any] = cast(dict[str, Any], streams[0])

     

       166
       166
       +
           return MediaInfo(

     

       167
       167
       +
               width=media["width"],

     

       168
       168
       +
               height=media["height"],

     

       169
       169
       +
               duration=media.get("duration", probe["format"].get("duration")),

     

       170
       170
       +
           )

+14 -8

cross/post.py

···

       1
       1
        
       from dataclasses import dataclass, field

     

       2
       2
       +
       from typing import TypeVar

     

       3
       3
       +
       

     

       2
       4
        
       from cross.attachments import Attachment

     

       3
       3
       -
       from cross.fragments import Fragment

     

       4
       4
       -
       from typing import TypeVar

     

       5
       5
       +
       from cross.tokens import Token

     

       6
       6
       +
       

     

       7
       7
       +
       T = TypeVar("T", bound=Attachment)

     

       5
       8
        
       

     

       6
       6
       -
       T = TypeVar('T', bound=Attachment)

     

       7
       9
        
       

     

       8
       10
        
       class AttachmentKeeper:

     

       9
       11
        
           def __init__(self) -> None:

     

       10
       12
        
               self._map: dict[type, Attachment] = {}

     

       11
       13
        
       

     

       12
       12
       -
           def put(self, cls: type[T], attachment: T) -> None:

     

       13
       13
       -
               self._map[cls] = attachment

     

       14
       14
       +
           def put(self, attachment: Attachment) -> None:

     

       15
       15
       +
               self._map[attachment.__class__] = attachment

     

       14
       16
        
       

     

       15
       17
        
           def get(self, cls: type[T]) -> T | None:

     

       16
       18
        
               instance = self._map.get(cls)

     
···

       20
       22
        
                   raise TypeError(f"Expected {cls.__name__}, got {type(instance).__name__}")

     

       21
       23
        
               return instance

     

       22
       24
        
       

     

       25
       25
       +
           def __repr__(self) -> str:

     

       26
       26
       +
               return f"AttachmentKeeper(_map={self._map.values()})"

     

       27
       27
       +
       

     

       28
       28
       +
       

     

       23
       29
        
       @dataclass

     

       24
       30
        
       class Post:

     

       25
       31
        
           id: str

     

       26
       32
        
           parent_id: str | None

     

       27
       27
       -
           text: bytes # utf-8 text bytes

     

       28
       28
       -
           attachments: AttachmentKeeper

     

       29
       29
       -
           fragments: list[Fragment] = field(default_factory=list)

     

       33
       33
       +
           tokens: list[Token]

     

       34
       34
       +
           text_type: str = "text/plain"

     

       35
       35
       +
           attachments: AttachmentKeeper = field(default_factory=AttachmentKeeper)

+140 -10

cross/service.py

···

       1
       1
       -
       from pathlib import Path

     

       1
       1
       +
       import logging

     

       2
       2
        
       import sqlite3

     

       3
       3
       -
       from typing import cast

     

       3
       3
       +
       from abc import ABC, abstractmethod

     

       4
       4
       +
       from typing import Any, Callable, cast

     

       4
       5
        
       

     

       5
       5
       -
       from database.connection import get_conn

     

       6
       6
       +
       from cross.post import Post

     

       7
       7
       +
       from database.connection import DatabasePool

     

       8
       8
       +
       

     

       9
       9
       +
       columns: list[str] = [

     

       10
       10
       +
           "user",

     

       11
       11
       +
           "service",

     

       12
       12
       +
           "identifier",

     

       13
       13
       +
           "parent",

     

       14
       14
       +
           "root",

     

       15
       15
       +
           "reposted",

     

       16
       16
       +
           "extra_data",

     

       17
       17
       +
       ]

     

       18
       18
       +
       placeholders: str = ", ".join(["?" for _ in columns])

     

       19
       19
       +
       column_names: str = ", ".join(columns)

     

       6
       20
        
       

     

       7
       21
        
       

     

       8
       22
        
       class Service:

     

       9
       9
       -
           def __init__(self, url: str, db: Path) -> None:

     

       23
       23
       +
           def __init__(self, url: str, db: DatabasePool) -> None:

     

       10
       24
        
               self.url: str = url

     

       11
       11
       -
               self.conn: sqlite3.Connection = get_conn(db)

     

       25
       25
       +
               self.db: DatabasePool = db

     

       26
       26
       +
               self.log: logging.Logger = logging.getLogger(self.__class__.__name__)

     

       27
       27
       +
               # self._lock: threading.Lock = threading.Lock()

     

       12
       28
        
       

     

       13
       13
       -
           def get_post(self, url: str, user: str, identifier: str) -> sqlite3.Row | None:

     

       14
       14
       -
               cursor = self.conn.cursor()

     

       29
       29
       +
           def _get_post(self, url: str, user: str, identifier: str) -> sqlite3.Row | None:

     

       30
       30
       +
               cursor = self.db.get_conn().cursor()

     

       15
       31
        
               _ = cursor.execute(

     

       16
       32
        
                   """

     

       17
       33
        
                   SELECT * FROM posts

     

       18
       34
        
                   WHERE service = ?

     

       19
       19
       -
                       AND user_id = ?

     

       35
       35
       +
                       AND user = ?

     

       20
       36
        
                       AND identifier = ?

     

       21
       37
        
                   """,

     

       22
       38
        
                   (url, user, identifier),

     

       23
       39
        
               )

     

       24
       40
        
               return cast(sqlite3.Row, cursor.fetchone())

     

       25
       41
        
       

     

       26
       26
       -
           def get_post_by_id(self, id: int) -> sqlite3.Row | None:

     

       27
       27
       -
               cursor = self.conn.cursor()

     

       42
       42
       +
           def _get_post_by_id(self, id: int) -> sqlite3.Row | None:

     

       43
       43
       +
               cursor = self.db.get_conn().cursor()

     

       28
       44
        
               _ = cursor.execute("SELECT * FROM posts WHERE id = ?", (id,))

     

       29
       45
        
               return cast(sqlite3.Row, cursor.fetchone())

     

       46
       46
       +
       

     

       47
       47
       +
           def _get_mappings(

     

       48
       48
       +
               self, original: int, service: str, user: str

     

       49
       49
       +
           ) -> list[sqlite3.Row]:

     

       50
       50
       +
               cursor = self.db.get_conn().cursor()

     

       51
       51
       +
               _ = cursor.execute(

     

       52
       52
       +
                   """

     

       53
       53
       +
                   SELECT *

     

       54
       54
       +
                   FROM posts AS p

     

       55
       55
       +
                   JOIN mappings AS m

     

       56
       56
       +
                     ON p.id = m.mapped

     

       57
       57
       +
                   WHERE m.original = ?

     

       58
       58
       +
                     AND p.service = ?

     

       59
       59
       +
                     AND p.user = ?

     

       60
       60
       +
                   ORDER BY p.id;

     

       61
       61
       +
                   """,

     

       62
       62
       +
                   (original, service, user),

     

       63
       63
       +
               )

     

       64
       64
       +
               return cursor.fetchall()

     

       65
       65
       +
       

     

       66
       66
       +
           def _find_mapped_thread(

     

       67
       67
       +
               self, parent: str, iservice: str, iuser: str, oservice: str, ouser: str

     

       68
       68
       +
           ):

     

       69
       69
       +
               reply_data = self._get_post(iservice, iuser, parent)

     

       70
       70
       +
               if not reply_data:

     

       71
       71
       +
                   return None

     

       72
       72
       +
       

     

       73
       73
       +
               reply_mappings: list[sqlite3.Row] | None = self._get_mappings(

     

       74
       74
       +
                   reply_data["id"], oservice, ouser

     

       75
       75
       +
               )

     

       76
       76
       +
               if not reply_mappings:

     

       77
       77
       +
                   return None

     

       78
       78
       +
       

     

       79
       79
       +
               reply_identifier: sqlite3.Row = reply_mappings[-1]

     

       80
       80
       +
               root_identifier: sqlite3.Row = reply_mappings[0]

     

       81
       81
       +
       

     

       82
       82
       +
               if reply_data["root_id"]:

     

       83
       83
       +
                   root_data = self._get_post_by_id(reply_data["root_id"])

     

       84
       84
       +
                   if not root_data:

     

       85
       85
       +
                       return None

     

       86
       86
       +
       

     

       87
       87
       +
                   root_mappings = self._get_mappings(reply_data["root_id"], oservice, ouser)

     

       88
       88
       +
                   if not root_mappings:

     

       89
       89
       +
                       return None

     

       90
       90
       +
                   root_identifier = root_mappings[0]

     

       91
       91
       +
       

     

       92
       92
       +
               return (

     

       93
       93
       +
                   root_identifier[0],  # real ids

     

       94
       94
       +
                   reply_identifier[0],

     

       95
       95
       +
                   reply_data["root_id"],  # db ids

     

       96
       96
       +
                   reply_data["id"],

     

       97
       97
       +
               )

     

       98
       98
       +
       

     

       99
       99
       +
           def _insert_post(self, post_data: dict[str, Any]):

     

       100
       100
       +
               values = [post_data.get(col) for col in columns]

     

       101
       101
       +
               cursor = self.db.get_conn().cursor()

     

       102
       102
       +
               _ = cursor.execute(

     

       103
       103
       +
                   f"INSERT INTO posts ({column_names}) VALUES ({placeholders})", values

     

       104
       104
       +
               )

     

       105
       105
       +
       

     

       106
       106
       +
           def _insert_post_mapping(self, original: int, mapped: int):

     

       107
       107
       +
               cursor = self.db.get_conn().cursor()

     

       108
       108
       +
               _ = cursor.execute(

     

       109
       109
       +
                   "INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?);",

     

       110
       110
       +
                   (original, mapped),

     

       111
       111
       +
               )

     

       112
       112
       +
               _ = cursor.execute(

     

       113
       113
       +
                   "INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?);",

     

       114
       114
       +
                   (mapped, original),

     

       115
       115
       +
               )

     

       116
       116
       +
       

     

       117
       117
       +
           def _delete_post(self, url: str, user: str, identifier: str):

     

       118
       118
       +
               cursor = self.db.get_conn().cursor()

     

       119
       119
       +
               _ = cursor.execute(

     

       120
       120
       +
                   """

     

       121
       121
       +
                   DELETE FROM posts

     

       122
       122
       +
                   WHERE identifier = ?

     

       123
       123
       +
                       AND service = ?

     

       124
       124
       +
                       AND user = ?

     

       125
       125
       +
                   """,

     

       126
       126
       +
                   (identifier, url, user),

     

       127
       127
       +
               )

     

       128
       128
       +
       

     

       129
       129
       +
           def _delete_post_by_id(self, id: int):

     

       130
       130
       +
               cursor = self.db.get_conn().cursor()

     

       131
       131
       +
               _ = cursor.execute("DELETE FROM posts WHERE id = ?", (id,))

     

       132
       132
       +
       

     

       133
       133
       +
       

     

       134
       134
       +
       class OutputService(Service):

     

       135
       135
       +
           def accept_post(self, service: str, user: str, post: Post):

     

       136
       136
       +
               self.log.warning("NOT IMPLEMENTED (%s), accept_post %s", self.url, post.id)

     

       137
       137
       +
       

     

       138
       138
       +
           def delete_post(self, service: str, user: str, post_id: str):

     

       139
       139
       +
               self.log.warning("NOT IMPLEMENTED (%s), delete_post %s", self.url, post_id)

     

       140
       140
       +
       

     

       141
       141
       +
           def accept_repost(self, service: str, user: str, repost_id: str, reposted_id: str):

     

       142
       142
       +
               self.log.warning(

     

       143
       143
       +
                   "NOT IMPLEMENTED (%s), accept_repost %s of %s",

     

       144
       144
       +
                   self.url,

     

       145
       145
       +
                   repost_id,

     

       146
       146
       +
                   reposted_id,

     

       147
       147
       +
               )

     

       148
       148
       +
       

     

       149
       149
       +
           def delete_repost(self, service: str, user: str, repost_id: str):

     

       150
       150
       +
               self.log.warning("NOT IMPLEMENTED (%s), delete_repost %s", self.url, repost_id)

     

       151
       151
       +
       

     

       152
       152
       +
       

     

       153
       153
       +
       class InputService(ABC, Service):

     

       154
       154
       +
           outputs: list[OutputService]

     

       155
       155
       +
           submitter: Callable[[Callable[[], None]], None]

     

       156
       156
       +
       

     

       157
       157
       +
           @abstractmethod

     

       158
       158
       +
           async def listen(self):

     

       159
       159
       +
               pass

+23

cross/tokens.py

···

       1
       1
       +
       from dataclasses import dataclass

     

       2
       2
       +
       

     

       3
       3
       +
       @dataclass(kw_only=True)

     

       4
       4
       +
       class Token:

     

       5
       5
       +
           pass

     

       6
       6
       +
       

     

       7
       7
       +
       @dataclass(kw_only=True)

     

       8
       8
       +
       class TextToken(Token):

     

       9
       9
       +
           text: str

     

       10
       10
       +
       

     

       11
       11
       +
       @dataclass(kw_only=True)

     

       12
       12
       +
       class LinkToken(Token):

     

       13
       13
       +
           href: str

     

       14
       14
       +
           label: str | None = None

     

       15
       15
       +
       

     

       16
       16
       +
       @dataclass(kw_only=True)

     

       17
       17
       +
       class TagToken(Token):

     

       18
       18
       +
           tag: str

     

       19
       19
       +
       

     

       20
       20
       +
       @dataclass(kw_only=True)

     

       21
       21
       +
       class MentionToken(Token):

     

       22
       22
       +
           username: str

     

       23
       23
       +
           uri: str | None = None

+20 -2

database/connection.py

···

       1
       1
       +
       import sqlite3

     

       2
       2
       +
       import threading

     

       1
       3
        
       from pathlib import Path

     

       2
       2
       -
       import sqlite3

     

       4
       4
       +
       

     

       5
       5
       +
       

     

       6
       6
       +
       class DatabasePool:

     

       7
       7
       +
           def __init__(self, db: Path) -> None:

     

       8
       8
       +
               self.db: Path = db

     

       9
       9
       +
               self._local: threading.local = threading.local()

     

       10
       10
       +
               self._conns: list[sqlite3.Connection] = []

     

       11
       11
       +
       

     

       12
       12
       +
           def get_conn(self) -> sqlite3.Connection:

     

       13
       13
       +
               if getattr(self._local, 'conn', None) is None:

     

       14
       14
       +
                   self._local.conn = get_conn(self.db)

     

       15
       15
       +
                   self._conns.append(self._local.conn)

     

       16
       16
       +
               return self._local.conn

     

       17
       17
       +
       

     

       18
       18
       +
           def close(self):

     

       19
       19
       +
               for c in self._conns:

     

       20
       20
       +
                   c.close()

     

       3
       21
        
       

     

       4
       22
        
       def get_conn(db: Path) -> sqlite3.Connection:

     

       5
       5
       -
           conn = sqlite3.connect(db, autocommit=True)

     

       23
       23
       +
           conn = sqlite3.connect(db, autocommit=True, check_same_thread=False)

     

       6
       24
        
           conn.row_factory = sqlite3.Row

     

       7
       25
        
           _ = conn.executescript("""

     

       8
       26
        
               PRAGMA journal_mode = WAL;

+13 -27

database/migrations.py

···

       1
       1
        
       import sqlite3

     

       2
       2
        
       from pathlib import Path

     

       3
       3
       +
       from typing import Callable

     

       3
       4
        
       

     

       4
       4
       -
       from util.util import LOGGER

     

       5
       5
        
       from database.connection import get_conn

     

       6
       6
       -
       

     

       6
       6
       +
       from util.util import LOGGER

     

       7
       7
        
       

     

       8
       8
        
       class DatabaseMigrator:

     

       9
       9
        
           def __init__(self, db_path: Path, migrations_folder: Path) -> None:

     

       10
       10
        
               self.db_path: Path = db_path

     

       11
       11
        
               self.migrations_folder: Path = migrations_folder

     

       12
       12
        
               self.conn: sqlite3.Connection = get_conn(db_path)

     

       13
       13
       +
               _ = self.conn.execute("PRAGMA foreign_keys = OFF;")

     

       14
       14
       +
               self.conn.autocommit = False

     

       13
       15
        
       

     

       14
       16
        
           def close(self):

     

       15
       17
        
               self.conn.close()

     
···

       24
       26
        
               _ = cursor.execute(f"PRAGMA user_version = {version}")

     

       25
       27
        
               self.conn.commit()

     

       26
       28
        
       

     

       27
       27
       -
           def get_migrations(self) -> list[tuple[int, Path]]:

     

       28
       28
       -
               if not self.migrations_folder.exists():

     

       29
       29
       -
                   return []

     

       30
       30
       -
       

     

       31
       31
       -
               files: list[tuple[int, Path]] = []

     

       32
       32
       -
               for f in self.migrations_folder.glob("*.sql"):

     

       33
       33
       -
                   try:

     

       34
       34
       -
                       version = int(f.stem.split("_")[0])

     

       35
       35
       -
                       files.append((version, f))

     

       36
       36
       -
                   except (ValueError, IndexError):

     

       37
       37
       -
                       LOGGER.warning("Warning: Skipping invalid migration file: %", f.name)

     

       38
       38
       -
       

     

       39
       39
       -
               return sorted(files, key=lambda x: x[0])

     

       40
       40
       -
       

     

       41
       41
       -
           def apply_migration(self, version: int, path: Path):

     

       42
       42
       -
               with open(path, "r") as f:

     

       43
       43
       -
                   sql = f.read()

     

       44
       44
       -
       

     

       45
       45
       -
               cursor = self.conn.cursor()

     

       29
       29
       +
           def apply_migration(self, version: int, filename: str, migration: Callable[[sqlite3.Connection], None]):

     

       46
       30
        
               try:

     

       47
       47
       -
                   _ = cursor.executescript(sql)

     

       31
       31
       +
                   _ = migration(self.conn)

     

       48
       32
        
                   self.set_version(version)

     

       49
       49
       -
                   LOGGER.info("Applied migration: %s", path.name)

     

       33
       33
       +
                   self.conn.commit()

     

       34
       34
       +
                   LOGGER.info("Applied migration: %s..", filename)

     

       50
       35
        
               except sqlite3.Error as e:

     

       51
       36
        
                   self.conn.rollback()

     

       52
       52
       -
                   raise Exception(f"Error applying migration {version}: {e}")

     

       37
       37
       +
                   raise Exception(f"Error applying migration {filename}: {e}")

     

       53
       38
        
       

     

       54
       39
        
           def migrate(self):

     

       55
       40
        
               current_version = self.get_version()

     

       56
       56
       -
               migrations = self.get_migrations()

     

       41
       41
       +
               from migrations._registry import load_migrations

     

       42
       42
       +
               migrations = load_migrations(self.migrations_folder)

     

       57
       43
        
       

     

       58
       44
        
               if not migrations:

     

       59
       45
        
                   LOGGER.warning("No migration files found.")

     
···

       64
       50
        
                   LOGGER.info("No pending migrations.")

     

       65
       51
        
                   return

     

       66
       52
        
       

     

       67
       67
       -
               for version, filepath in pending:

     

       68
       68
       -
                   self.apply_migration(version, filepath)

     

       53
       53
       +
               for version, filename, migration in pending:

     

       54
       54
       +
                   self.apply_migration(version, filename, migration)

+11 -2

env.py

···

       1
       1
        
       import os

     

       2
       2
       +
       from pathlib import Path

     

       2
       3
        
       

     

       3
       3
       -
       DATA_DIR = os.environ.get('DATA_DIR') or "./data"

     

       4
       4
       -
       MIGRATIONS_DIR = os.environ.get('MIGRATIONS_DIR') or "./migrations"

     

       4
       4
       +
       DEV = bool(os.environ.get("DEV")) or False

     

       5
       5
       +
       

     

       6
       6
       +
       DATA_DIR = Path(os.environ.get("DATA_DIR") or "./data")

     

       7
       7
       +
       CACHE_DIR = Path(os.environ.get("CACHE_DIR") or DATA_DIR.joinpath("cache"))

     

       8
       8
       +
       SETTINGS_DIR = Path(os.environ.get("SETTINGS_DIR") or DATA_DIR.joinpath("settings.json"))

     

       9
       9
       +
       DATABASE_DIR = Path(os.environ.get("DATABASE_DIR") or DATA_DIR.joinpath("data.db"))

     

       10
       10
       +
       

     

       11
       11
       +
       MIGRATIONS_DIR = Path(os.environ.get("MIGRATIONS_DIR") or "./migrations")

     

       12
       12
       +
       

     

       13
       13
       +
       PLC_HOST = os.environ.get("PLC_HOST") or "https://plc.directory"

+40 -18

main.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       1
       3
        
       import queue

     

       2
       4
        
       import threading

     

       3
       5
        
       from pathlib import Path

     

       4
       4
       -
       from time import sleep

     

       5
       6
        
       from typing import Callable

     

       6
       7
        
       

     

       8
       8
       +
       from database.connection import DatabasePool

     

       7
       9
        
       import env

     

       8
       10
        
       from database.migrations import DatabaseMigrator

     

       9
       9
       -
       from util.util import LOGGER

     

       11
       11
       +
       from registry import create_input_service, create_output_service

     

       12
       12
       +
       from registry_bootstrap import bootstrap

     

       13
       13
       +
       from util.util import LOGGER, read_env, shutdown_hook

     

       10
       14
        
       

     

       11
       15
        
       

     

       12
       16
        
       def main() -> None:

     

       13
       13
       -
           data = Path(env.DATA_DIR)

     

       17
       17
       +
           if not env.DATA_DIR.exists():

     

       18
       18
       +
               env.DATA_DIR.mkdir(parents=True)

     

       14
       19
        
       

     

       15
       15
       -
           if not data.exists():

     

       16
       16
       -
               data.mkdir(parents=True)

     

       17
       17
       -
       

     

       18
       18
       -
           settings = data.joinpath("settings.json")

     

       19
       19
       -
           database = data.joinpath("db.sqlite")

     

       20
       20
       -
       

     

       21
       21
       -
           if not settings.exists():

     

       22
       22
       -
               LOGGER.info("First launch detected! Creating %s and exiting!", settings)

     

       20
       20
       +
           if not env.SETTINGS_DIR.exists():

     

       21
       21
       +
               LOGGER.info("First launch detected! Creating %s and exiting!", env.SETTINGS_DIR)

     

       23
       22
        
               return

     

       24
       23
        
       

     

       25
       25
       -
           LOGGER.info("Loading settings...")

     

       26
       26
       -
           # TODO

     

       27
       27
       -
       

     

       28
       28
       -
           migrator = DatabaseMigrator(database, Path(env.MIGRATIONS_DIR))

     

       24
       24
       +
           migrator = DatabaseMigrator(env.DATABASE_DIR, env.MIGRATIONS_DIR)

     

       29
       25
        
           try:

     

       30
       26
        
               migrator.migrate()

     

       31
       27
        
           except Exception:

     

       32
       28
        
               LOGGER.exception("Failed to migrate database!")

     

       29
       29
       +
               return

     

       33
       30
        
           finally:

     

       34
       31
        
               migrator.close()

     

       35
       32
        
       

     

       33
       33
       +
           db_pool = DatabasePool(env.DATABASE_DIR)

     

       34
       34
       +
       

     

       35
       35
       +
           LOGGER.info("Bootstrapping registries...")

     

       36
       36
       +
           bootstrap()

     

       37
       37
       +
       

     

       38
       38
       +
           LOGGER.info("Loading settings...")

     

       39
       39
       +
       

     

       40
       40
       +
           with open(env.SETTINGS_DIR) as f:

     

       41
       41
       +
               settings = json.load(f)

     

       42
       42
       +
               read_env(settings)

     

       43
       43
       +
       

     

       44
       44
       +
           if "input" not in settings:

     

       45
       45
       +
               raise KeyError("No `input` sepcified in settings!")

     

       46
       46
       +
           if "outputs" not in settings:

     

       47
       47
       +
               raise KeyError("No `outputs` spicified in settings!")

     

       48
       48
       +
       

     

       49
       49
       +
           input = create_input_service(db_pool, settings["input"])

     

       50
       50
       +
           outputs = [create_output_service(db_pool, data) for data in settings["outputs"]]

     

       51
       51
       +
       

     

       36
       52
        
           LOGGER.info("Starting task worker...")

     

       37
       53
        
       

     

       38
       54
        
           def worker(task_queue: queue.Queue[Callable[[], None] | None]):

     
···

       52
       68
        
           thread = threading.Thread(target=worker, args=(task_queue,), daemon=True)

     

       53
       69
        
           thread.start()

     

       54
       70
        
       

     

       55
       55
       -
           LOGGER.info("Connecting to %s...", 'TODO') # TODO

     

       71
       71
       +
           LOGGER.info("Connecting to %s...", input.url)

     

       72
       72
       +
           input.outputs = outputs

     

       73
       73
       +
           input.submitter = lambda c: task_queue.put(c)

     

       56
       74
        
           try:

     

       57
       57
       -
               task_queue.put(lambda: print("hi"))

     

       58
       58
       -
               sleep(10) # TODO

     

       75
       75
       +
               asyncio.run(input.listen())

     

       59
       76
        
           except KeyboardInterrupt:

     

       60
       77
        
               LOGGER.info("Stopping...")

     

       61
       78
        
       

     

       62
       79
        
           task_queue.join()

     

       63
       80
        
           task_queue.put(None)

     

       64
       81
        
           thread.join()

     

       82
       82
       +
           db_pool.close()

     

       83
       83
       +
       

     

       84
       84
       +
           for shook in shutdown_hook:

     

       85
       85
       +
               shook()

     

       86
       86
       +
       

     

       65
       87
        
       

     

       66
       88
        
       if __name__ == "__main__":

     

       67
       89
        
           main()

+109

mastodon/info.py

···

       1
       1
       +
       from abc import ABC, abstractmethod

     

       2
       2
       +
       from dataclasses import dataclass

     

       3
       3
       +
       from typing import Any

     

       4
       4
       +
       

     

       5
       5
       +
       import requests

     

       6
       6
       +
       

     

       7
       7
       +
       from cross.service import Service

     

       8
       8
       +
       from util.util import normalize_service_url

     

       9
       9
       +
       

     

       10
       10
       +
       

     

       11
       11
       +
       def validate_and_transform(data: dict[str, Any]):

     

       12
       12
       +
           if "token" not in data or "instance" not in data:

     

       13
       13
       +
               raise KeyError("Missing required values 'token' or 'instance'")

     

       14
       14
       +
       

     

       15
       15
       +
           data["instance"] = normalize_service_url(data["instance"])

     

       16
       16
       +
       

     

       17
       17
       +
       

     

       18
       18
       +
       @dataclass(kw_only=True)

     

       19
       19
       +
       class InstanceInfo:

     

       20
       20
       +
           max_characters: int = 500

     

       21
       21
       +
           max_media_attachments: int = 4

     

       22
       22
       +
           characters_reserved_per_url: int = 23

     

       23
       23
       +
       

     

       24
       24
       +
           image_size_limit: int = 16777216

     

       25
       25
       +
           video_size_limit: int = 103809024

     

       26
       26
       +
       

     

       27
       27
       +
           text_format: str = "text/plain"

     

       28
       28
       +
       

     

       29
       29
       +
           @classmethod

     

       30
       30
       +
           def from_api(cls, data: dict[str, Any]) -> "InstanceInfo":

     

       31
       31
       +
               config: dict[str, Any] = {}

     

       32
       32
       +
       

     

       33
       33
       +
               if "statuses" in data:

     

       34
       34
       +
                   statuses_config: dict[str, Any] = data.get("statuses", {})

     

       35
       35
       +
                   if "max_characters" in statuses_config:

     

       36
       36
       +
                       config["max_characters"] = statuses_config["max_characters"]

     

       37
       37
       +
                   if "max_media_attachments" in statuses_config:

     

       38
       38
       +
                       config["max_media_attachments"] = statuses_config[

     

       39
       39
       +
                           "max_media_attachments"

     

       40
       40
       +
                       ]

     

       41
       41
       +
                   if "characters_reserved_per_url" in statuses_config:

     

       42
       42
       +
                       config["characters_reserved_per_url"] = statuses_config[

     

       43
       43
       +
                           "characters_reserved_per_url"

     

       44
       44
       +
                       ]

     

       45
       45
       +
       

     

       46
       46
       +
                   # glitch content type

     

       47
       47
       +
                   if "supported_mime_types" in statuses_config:

     

       48
       48
       +
                       text_mimes: list[str] = statuses_config["supported_mime_types"]

     

       49
       49
       +
       

     

       50
       50
       +
                       if "text/x.misskeymarkdown" in text_mimes:

     

       51
       51
       +
                           config["text_format"] = "text/x.misskeymarkdown"

     

       52
       52
       +
                       elif "text/markdown" in text_mimes:

     

       53
       53
       +
                           config["text_format"] = "text/markdown"

     

       54
       54
       +
       

     

       55
       55
       +
               if "media_attachments" in data:

     

       56
       56
       +
                   media_config: dict[str, Any] = data["media_attachments"]

     

       57
       57
       +
                   if "image_size_limit" in media_config:

     

       58
       58
       +
                       config["image_size_limit"] = media_config["image_size_limit"]

     

       59
       59
       +
                   if "video_size_limit" in media_config:

     

       60
       60
       +
                       config["video_size_limit"] = media_config["video_size_limit"]

     

       61
       61
       +
       

     

       62
       62
       +
               # *oma extensions

     

       63
       63
       +
               if "max_toot_chars" in data:

     

       64
       64
       +
                   config["max_characters"] = data["max_toot_chars"]

     

       65
       65
       +
               if "upload_limit" in data:

     

       66
       66
       +
                   config["image_size_limit"] = data["upload_limit"]

     

       67
       67
       +
                   config["video_size_limit"] = data["upload_limit"]

     

       68
       68
       +
       

     

       69
       69
       +
               if "pleroma" in data:

     

       70
       70
       +
                   pleroma: dict[str, Any] = data["pleroma"]

     

       71
       71
       +
                   if "metadata" in pleroma:

     

       72
       72
       +
                       metadata: dict[str, Any] = pleroma["metadata"]

     

       73
       73
       +
                       if "post_formats" in metadata:

     

       74
       74
       +
                           post_formats: list[str] = metadata["post_formats"]

     

       75
       75
       +
       

     

       76
       76
       +
                           if "text/x.misskeymarkdown" in post_formats:

     

       77
       77
       +
                               config["text_format"] = "text/x.misskeymarkdown"

     

       78
       78
       +
                           elif "text/markdown" in post_formats:

     

       79
       79
       +
                               config["text_format"] = "text/markdown"

     

       80
       80
       +
       

     

       81
       81
       +
               return InstanceInfo(**config)

     

       82
       82
       +
       

     

       83
       83
       +
       

     

       84
       84
       +
       class MastodonService(ABC, Service):

     

       85
       85
       +
           def verify_credentials(self):

     

       86
       86
       +
               token = self._get_token()

     

       87
       87
       +
               response = requests.get(

     

       88
       88
       +
                   f"{self.url}/api/v1/accounts/verify_credentials",

     

       89
       89
       +
                   headers={"Authorization": f"Bearer {token}"},

     

       90
       90
       +
               )

     

       91
       91
       +
               if response.status_code != 200:

     

       92
       92
       +
                   self.log.error("Failed to validate user credentials!")

     

       93
       93
       +
                   response.raise_for_status()

     

       94
       94
       +
               return dict(response.json())

     

       95
       95
       +
       

     

       96
       96
       +
           def fetch_instance_info(self):

     

       97
       97
       +
               token = self._get_token()

     

       98
       98
       +
               responce = requests.get(

     

       99
       99
       +
                   f"{self.url}/api/v1/instance",

     

       100
       100
       +
                   headers={"Authorization": f"Bearer {token}"},

     

       101
       101
       +
               )

     

       102
       102
       +
               if responce.status_code != 200:

     

       103
       103
       +
                   self.log.error("Failed to get instance info!")

     

       104
       104
       +
                   responce.raise_for_status()

     

       105
       105
       +
               return dict(responce.json())

     

       106
       106
       +
       

     

       107
       107
       +
           @abstractmethod

     

       108
       108
       +
           def _get_token(self) -> str:

     

       109
       109
       +
               pass

+232

mastodon/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       from dataclasses import dataclass, field

     

       5
       5
       +
       from typing import Any, cast, override

     

       6
       6
       +
       

     

       7
       7
       +
       import websockets

     

       8
       8
       +
       

     

       9
       9
       +
       from cross.attachments import (

     

       10
       10
       +
           LabelsAttachment,

     

       11
       11
       +
           LanguagesAttachment,

     

       12
       12
       +
           MediaAttachment,

     

       13
       13
       +
           QuoteAttachment,

     

       14
       14
       +
           RemoteUrlAttachment,

     

       15
       15
       +
           SensitiveAttachment,

     

       16
       16
       +
       )

     

       17
       17
       +
       from cross.media import Blob, download_blob

     

       18
       18
       +
       from cross.post import Post

     

       19
       19
       +
       from cross.service import InputService

     

       20
       20
       +
       from database.connection import DatabasePool

     

       21
       21
       +
       from mastodon.info import MastodonService, validate_and_transform

     

       22
       22
       +
       from mastodon.parser import StatusParser

     

       23
       23
       +
       

     

       24
       24
       +
       ALLOWED_VISIBILITY: list[str] = ["public", "unlisted"]

     

       25
       25
       +
       

     

       26
       26
       +
       

     

       27
       27
       +
       @dataclass(kw_only=True)

     

       28
       28
       +
       class MastodonInputOptions:

     

       29
       29
       +
           token: str

     

       30
       30
       +
           instance: str

     

       31
       31
       +
           allowed_visibility: list[str] = field(

     

       32
       32
       +
               default_factory=lambda: ALLOWED_VISIBILITY.copy()

     

       33
       33
       +
           )

     

       34
       34
       +
           filters: list[re.Pattern[str]] = field(default_factory=lambda: [])

     

       35
       35
       +
       

     

       36
       36
       +
           @classmethod

     

       37
       37
       +
           def from_dict(cls, data: dict[str, Any]) -> "MastodonInputOptions":

     

       38
       38
       +
               validate_and_transform(data)

     

       39
       39
       +
       

     

       40
       40
       +
               if "allowed_visibility" in data:

     

       41
       41
       +
                   for vis in data.get("allowed_visibility", []):

     

       42
       42
       +
                       if vis not in ALLOWED_VISIBILITY:

     

       43
       43
       +
                           raise ValueError(f"Invalid visibility option {vis}!")

     

       44
       44
       +
       

     

       45
       45
       +
               if "filters" in data:

     

       46
       46
       +
                   data["filters"] = [re.compile(r) for r in data["filters"]]

     

       47
       47
       +
       

     

       48
       48
       +
               return MastodonInputOptions(**data)

     

       49
       49
       +
       

     

       50
       50
       +
       

     

       51
       51
       +
       class MastodonInputService(MastodonService, InputService):

     

       52
       52
       +
           def __init__(self, db: DatabasePool, options: MastodonInputOptions) -> None:

     

       53
       53
       +
               super().__init__(options.instance, db)

     

       54
       54
       +
               self.options: MastodonInputOptions = options

     

       55
       55
       +
       

     

       56
       56
       +
               self.log.info("Verifying %s credentails...", self.url)

     

       57
       57
       +
               response = self.verify_credentials()

     

       58
       58
       +
               self.user_id: str = response["id"]

     

       59
       59
       +
       

     

       60
       60
       +
               self.log.info("Getting %s configuration...", self.url)

     

       61
       61
       +
               response = self.fetch_instance_info()

     

       62
       62
       +
               self.streaming_url: str = response["urls"]["streaming_api"]

     

       63
       63
       +
       

     

       64
       64
       +
           @override

     

       65
       65
       +
           def _get_token(self) -> str:

     

       66
       66
       +
               return self.options.token

     

       67
       67
       +
       

     

       68
       68
       +
           def _on_create_post(self, status: dict[str, Any]):

     

       69
       69
       +
               if status["account"]["id"] != self.user_id:

     

       70
       70
       +
                   return

     

       71
       71
       +
       

     

       72
       72
       +
               if status["visibility"] not in self.options.allowed_visibility:

     

       73
       73
       +
                   return

     

       74
       74
       +
       

     

       75
       75
       +
               reblog: dict[str, Any] | None = status.get("reblog")

     

       76
       76
       +
               if reblog:

     

       77
       77
       +
                   if reblog["account"]["id"] != self.user_id:

     

       78
       78
       +
                       return

     

       79
       79
       +
                   self._on_reblog(status, reblog)

     

       80
       80
       +
                   return

     

       81
       81
       +
       

     

       82
       82
       +
               if status.get("poll"):

     

       83
       83
       +
                   self.log.info("Skipping '%s'! Contains a poll..", status["id"])

     

       84
       84
       +
                   return

     

       85
       85
       +
       

     

       86
       86
       +
               quote: dict[str, Any] | None = status.get("quote")

     

       87
       87
       +
               if quote:

     

       88
       88
       +
                   quote = quote['quoted_status'] if quote.get('quoted_status') else quote

     

       89
       89
       +
                   if not quote or quote["account"]["id"] != self.user_id:

     

       90
       90
       +
                       return

     

       91
       91
       +
       

     

       92
       92
       +
                   rquote = self._get_post(self.url, self.user_id, quote['id'])

     

       93
       93
       +
                   if not rquote:

     

       94
       94
       +
                       self.log.info(

     

       95
       95
       +
                           "Skipping %s, parent %s not found in db", status["id"], quote['id']

     

       96
       96
       +
                       )

     

       97
       97
       +
                       return

     

       98
       98
       +
       

     

       99
       99
       +
               in_reply: str | None = status.get("in_reply_to_id")

     

       100
       100
       +
               in_reply_to: str | None = status.get("in_reply_to_account_id")

     

       101
       101
       +
               if in_reply_to and in_reply_to != self.user_id:

     

       102
       102
       +
                   return

     

       103
       103
       +
       

     

       104
       104
       +
               parent = None

     

       105
       105
       +
               if in_reply:

     

       106
       106
       +
                   parent = self._get_post(self.url, self.user_id, in_reply)

     

       107
       107
       +
                   if not parent:

     

       108
       108
       +
                       self.log.info(

     

       109
       109
       +
                           "Skipping %s, parent %s not found in db", status["id"], in_reply

     

       110
       110
       +
                       )

     

       111
       111
       +
                       return

     

       112
       112
       +
               parser = StatusParser(status)

     

       113
       113
       +
               parser.feed(status["content"])

     

       114
       114
       +
               tokens = parser.get_result()

     

       115
       115
       +
       

     

       116
       116
       +
               post = Post(id=status["id"], parent_id=in_reply, tokens=tokens)

     

       117
       117
       +
       

     

       118
       118
       +
               if quote:

     

       119
       119
       +
                   post.attachments.put(QuoteAttachment(quoted_id=quote['id'], quoted_user=self.user_id))

     

       120
       120
       +
               if status.get("url"):

     

       121
       121
       +
                   post.attachments.put(RemoteUrlAttachment(url=status["url"]))

     

       122
       122
       +
               if status.get("sensitive"):

     

       123
       123
       +
                   post.attachments.put(SensitiveAttachment(sensitive=True))

     

       124
       124
       +
               if status.get("language"):

     

       125
       125
       +
                   post.attachments.put(LanguagesAttachment(langs=[status["language"]]))

     

       126
       126
       +
               if status.get("spoiler"):

     

       127
       127
       +
                   post.attachments.put(LabelsAttachment(labels=[status["spoiler"]]))

     

       128
       128
       +
       

     

       129
       129
       +
               blobs: list[Blob] = []

     

       130
       130
       +
               for media in status.get("media_attachments", []):

     

       131
       131
       +
                   self.log.info("Downloading %s...", media["url"])

     

       132
       132
       +
                   blob: Blob | None = download_blob(media["url"], media.get("alt"))

     

       133
       133
       +
                   if not blob:

     

       134
       134
       +
                       self.log.error(

     

       135
       135
       +
                           "Skipping %s! Failed to download media %s.",

     

       136
       136
       +
                           status["id"],

     

       137
       137
       +
                           media["url"],

     

       138
       138
       +
                       )

     

       139
       139
       +
                       return

     

       140
       140
       +
                   blobs.append(blob)

     

       141
       141
       +
       

     

       142
       142
       +
               if blobs:

     

       143
       143
       +
                   post.attachments.put(MediaAttachment(blobs=blobs))

     

       144
       144
       +
       

     

       145
       145
       +
               if parent:

     

       146
       146
       +
                   self._insert_post(

     

       147
       147
       +
                       {

     

       148
       148
       +
                           "user": self.user_id,

     

       149
       149
       +
                           "service": self.url,

     

       150
       150
       +
                           "identifier": status["id"],

     

       151
       151
       +
                           "parent": parent["id"],

     

       152
       152
       +
                           "root": parent["id"] if not parent["root"] else parent["root"],

     

       153
       153
       +
                       }

     

       154
       154
       +
                   )

     

       155
       155
       +
               else:

     

       156
       156
       +
                   self._insert_post(

     

       157
       157
       +
                       {

     

       158
       158
       +
                           "user": self.user_id,

     

       159
       159
       +
                           "service": self.url,

     

       160
       160
       +
                           "identifier": status["id"],

     

       161
       161
       +
                       }

     

       162
       162
       +
                   )

     

       163
       163
       +
       

     

       164
       164
       +
               for out in self.outputs:

     

       165
       165
       +
                   self.submitter(lambda: out.accept_post(post))

     

       166
       166
       +
       

     

       167
       167
       +
           def _on_reblog(self, status: dict[str, Any], reblog: dict[str, Any]):

     

       168
       168
       +
               reposted = self._get_post(self.url, self.user_id, reblog["id"])

     

       169
       169
       +
               if not reposted:

     

       170
       170
       +
                   self.log.info(

     

       171
       171
       +
                       "Skipping repost '%s' as reposted post '%s' was not found in the db.",

     

       172
       172
       +
                       status["id"],

     

       173
       173
       +
                       reblog["id"],

     

       174
       174
       +
                   )

     

       175
       175
       +
                   return

     

       176
       176
       +
       

     

       177
       177
       +
               self._insert_post(

     

       178
       178
       +
                   {

     

       179
       179
       +
                       "user": self.user_id,

     

       180
       180
       +
                       "service": self.url,

     

       181
       181
       +
                       "identifier": status["id"],

     

       182
       182
       +
                       "reposted": reposted["id"],

     

       183
       183
       +
                   }

     

       184
       184
       +
               )

     

       185
       185
       +
       

     

       186
       186
       +
               for out in self.outputs:

     

       187
       187
       +
                   self.submitter(lambda: out.accept_repost(status["id"], reblog["id"]))

     

       188
       188
       +
       

     

       189
       189
       +
           def _on_delete_post(self, status_id: str):

     

       190
       190
       +
               post = self._get_post(self.url, self.user_id, status_id)

     

       191
       191
       +
               if not post:

     

       192
       192
       +
                   return

     

       193
       193
       +
       

     

       194
       194
       +
               if post["reposted_id"]:

     

       195
       195
       +
                   for output in self.outputs:

     

       196
       196
       +
                       self.submitter(lambda: output.delete_repost(status_id))

     

       197
       197
       +
               else:

     

       198
       198
       +
                   for output in self.outputs:

     

       199
       199
       +
                       self.submitter(lambda: output.delete_post(status_id))

     

       200
       200
       +
               self._delete_post_by_id(post["id"])

     

       201
       201
       +
       

     

       202
       202
       +
           def _accept_msg(self, msg: websockets.Data) -> None:

     

       203
       203
       +
               data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))

     

       204
       204
       +
               event: str = cast(str, data["event"])

     

       205
       205
       +
               payload: str = cast(str, data["payload"])

     

       206
       206
       +
       

     

       207
       207
       +
               if event == "update":

     

       208
       208
       +
                   self._on_create_post(json.loads(payload))

     

       209
       209
       +
               elif event == "delete":

     

       210
       210
       +
                   self._on_delete_post(payload)

     

       211
       211
       +
       

     

       212
       212
       +
           @override

     

       213
       213
       +
           async def listen(self):

     

       214
       214
       +
               url = f"{self.streaming_url}/api/v1/streaming?stream=user"

     

       215
       215
       +
       

     

       216
       216
       +
               async for ws in websockets.connect(

     

       217
       217
       +
                   url, additional_headers={"Authorization": f"Bearer {self.options.token}"}

     

       218
       218
       +
               ):

     

       219
       219
       +
                   try:

     

       220
       220
       +
                       self.log.info("Listening to %s...", self.streaming_url)

     

       221
       221
       +
       

     

       222
       222
       +
                       async def listen_for_messages():

     

       223
       223
       +
                           async for msg in ws:

     

       224
       224
       +
                               self.submitter(lambda: self._accept_msg(msg))

     

       225
       225
       +
       

     

       226
       226
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       227
       227
       +
       

     

       228
       228
       +
                       _ = await asyncio.gather(listen)

     

       229
       229
       +
                   except websockets.ConnectionClosedError as e:

     

       230
       230
       +
                       self.log.error(e, stack_info=True, exc_info=True)

     

       231
       231
       +
                       self.log.info("Reconnecting to %s...", self.streaming_url)

     

       232
       232
       +
                       continue

+178

mastodon/output.py

···

       1
       1
       +
       from dataclasses import dataclass

     

       2
       2
       +
       from typing import Any, override

     

       3
       3
       +
       

     

       4
       4
       +
       import requests

     

       5
       5
       +
       

     

       6
       6
       +
       from cross.attachments import (

     

       7
       7
       +
           LanguagesAttachment,

     

       8
       8
       +
           QuoteAttachment,

     

       9
       9
       +
           RemoteUrlAttachment,

     

       10
       10
       +
           SensitiveAttachment,

     

       11
       11
       +
       )

     

       12
       12
       +
       from cross.post import Post

     

       13
       13
       +
       from cross.service import OutputService

     

       14
       14
       +
       from database.connection import DatabasePool

     

       15
       15
       +
       from mastodon.info import InstanceInfo, MastodonService, validate_and_transform

     

       16
       16
       +
       

     

       17
       17
       +
       ALLOWED_POSTING_VISIBILITY: list[str] = ["public", "unlisted", "private"]

     

       18
       18
       +
       

     

       19
       19
       +
       

     

       20
       20
       +
       @dataclass(kw_only=True)

     

       21
       21
       +
       class MastodonOutputOptions:

     

       22
       22
       +
           token: str

     

       23
       23
       +
           instance: str

     

       24
       24
       +
           visibility: str = "public"

     

       25
       25
       +
       

     

       26
       26
       +
           @classmethod

     

       27
       27
       +
           def from_dict(cls, data: dict[str, Any]) -> "MastodonOutputOptions":

     

       28
       28
       +
               validate_and_transform(data)

     

       29
       29
       +
       

     

       30
       30
       +
               if "visibility" in data:

     

       31
       31
       +
                   if data["visibility"] not in ALLOWED_POSTING_VISIBILITY:

     

       32
       32
       +
                       raise ValueError(f"Invalid visibility option {data['visibility']}!")

     

       33
       33
       +
       

     

       34
       34
       +
               return MastodonOutputOptions(**data)

     

       35
       35
       +
       

     

       36
       36
       +
       

     

       37
       37
       +
       # TODO

     

       38
       38
       +
       class MastodonOutputService(MastodonService, OutputService):

     

       39
       39
       +
           def __init__(self, db: DatabasePool, options: MastodonOutputOptions) -> None:

     

       40
       40
       +
               super().__init__(options.instance, db)

     

       41
       41
       +
               self.options: MastodonOutputOptions = options

     

       42
       42
       +
       

     

       43
       43
       +
               self.log.info("Verifying %s credentails...", self.url)

     

       44
       44
       +
               response = self.verify_credentials()

     

       45
       45
       +
               self.user_id: str = response["id"]

     

       46
       46
       +
       

     

       47
       47
       +
               self.log.info("Getting %s configuration...", self.url)

     

       48
       48
       +
               response = self.fetch_instance_info()

     

       49
       49
       +
               self.instance_info: InstanceInfo = InstanceInfo.from_api(response)

     

       50
       50
       +
       

     

       51
       51
       +
           def accept_post(self, service: str, user: str, post: Post):

     

       52
       52
       +
               new_root_id: int | None = None

     

       53
       53
       +
               new_parent_id: int | None = None

     

       54
       54
       +
       

     

       55
       55
       +
               reply_ref: str | None = None

     

       56
       56
       +
               if post.parent_id:

     

       57
       57
       +
                   thread = self._find_mapped_thread(

     

       58
       58
       +
                       post.parent_id, service, user, self.url, self.user_id

     

       59
       59
       +
                   )

     

       60
       60
       +
       

     

       61
       61
       +
                   if not thread:

     

       62
       62
       +
                       self.log.error("Failed to find thread tuple in the database!")

     

       63
       63
       +
                       return

     

       64
       64
       +
                   _, reply_ref, new_root_id, new_parent_id = thread

     

       65
       65
       +
       

     

       66
       66
       +
               quote = post.attachments.get(QuoteAttachment)

     

       67
       67
       +
               if quote:

     

       68
       68
       +
                   if quote.quoted_user != user:

     

       69
       69
       +
                       self.log.info("Quoted other user, skipping!")

     

       70
       70
       +
                       return

     

       71
       71
       +
       

     

       72
       72
       +
                   quoted_post = self._get_post(service, user, quote.quoted_id)

     

       73
       73
       +
                   if not quoted_post:

     

       74
       74
       +
                       self.log.error("Failed to find quoted post in the database!")

     

       75
       75
       +
                       return

     

       76
       76
       +
       

     

       77
       77
       +
                   quoted_mappings = self._get_mappings(quoted_post["id"], self.url, self.user_id)

     

       78
       78
       +
                   if not quoted_mappings:

     

       79
       79
       +
                       self.log.error("Failed to find mappings for quoted post!")

     

       80
       80
       +
                       return

     

       81
       81
       +
       

     

       82
       82
       +
                   quoted_local_id = quoted_mappings[-1][0]

     

       83
       83
       +
                   # TODO resolve service identifier

     

       84
       84
       +
       

     

       85
       85
       +
               post_tokens = post.tokens.copy()

     

       86
       86
       +
       

     

       87
       87
       +
               remote_url = post.attachments.get(RemoteUrlAttachment)

     

       88
       88
       +
               if remote_url and remote_url.url and post.text_type == "text/x.misskeymarkdown":

     

       89
       89
       +
                   # TODO stip mfm

     

       90
       90
       +
                   pass

     

       91
       91
       +
       

     

       92
       92
       +
               raw_statuses = [] # TODO split tokens and media across posts

     

       93
       93
       +
               if not raw_statuses:

     

       94
       94
       +
                   self.log.error("Failed to split post into statuses!")

     

       95
       95
       +
                   return

     

       96
       96
       +
       

     

       97
       97
       +
               langs = post.attachments.get(LanguagesAttachment)

     

       98
       98
       +
               sensitive = post.attachments.get(SensitiveAttachment)

     

       99
       99
       +
       

     

       100
       100
       +
               if langs and langs.langs:

     

       101
       101
       +
                   pass # TODO

     

       102
       102
       +
       

     

       103
       103
       +
               if sensitive and sensitive.sensitive:

     

       104
       104
       +
                   pass # TODO

     

       105
       105
       +
       

     

       106
       106
       +
           def delete_post(self, service: str, user: str, post_id: str):

     

       107
       107
       +
               post = self._get_post(service, user, post_id)

     

       108
       108
       +
               if not post:

     

       109
       109
       +
                   self.log.info("Post not found in db, skipping delete..")

     

       110
       110
       +
                   return

     

       111
       111
       +
       

     

       112
       112
       +
               mappings = self._get_mappings(post["id"], self.url, self.user_id)

     

       113
       113
       +
               for mapping in mappings[::-1]:

     

       114
       114
       +
                   self.log.info("Deleting '%s'...", mapping["identifier"])

     

       115
       115
       +
                   requests.delete(

     

       116
       116
       +
                       f"{self.url}/api/v1/statuses/{mapping['identifier']}",

     

       117
       117
       +
                       headers={"Authorization": f"Bearer {self._get_token()}"},

     

       118
       118
       +
                   )

     

       119
       119
       +
                   self._delete_post_by_id(mapping["id"])

     

       120
       120
       +
       

     

       121
       121
       +
           def accept_repost(self, service: str, user: str, repost_id: str, reposted_id: str):

     

       122
       122
       +
               reposted = self._get_post(service, user, reposted_id)

     

       123
       123
       +
               if not reposted:

     

       124
       124
       +
                   self.log.info("Post not found in db, skipping repost..")

     

       125
       125
       +
                   return

     

       126
       126
       +
       

     

       127
       127
       +
               mappings = self._get_mappings(reposted["id"], self.url, self.user_id)

     

       128
       128
       +
               if mappings:

     

       129
       129
       +
                   rsp = requests.post(

     

       130
       130
       +
                       f"{self.url}/api/v1/statuses/{mappings[0]['identifier']}/reblog",

     

       131
       131
       +
                       headers={"Authorization": f"Bearer {self._get_token()}"},

     

       132
       132
       +
                   )

     

       133
       133
       +
       

     

       134
       134
       +
                   if rsp.status_code != 200:

     

       135
       135
       +
                       self.log.error(

     

       136
       136
       +
                           "Failed to boost status! status_code: %s, msg: %s",

     

       137
       137
       +
                           rsp.status_code,

     

       138
       138
       +
                           rsp.content,

     

       139
       139
       +
                       )

     

       140
       140
       +
                       return

     

       141
       141
       +
       

     

       142
       142
       +
                   self._insert_post(

     

       143
       143
       +
                       {

     

       144
       144
       +
                           "user": self.user_id,

     

       145
       145
       +
                           "service": self.url,

     

       146
       146
       +
                           "identifier": rsp.json()["id"],

     

       147
       147
       +
                           "reposted": mappings[0]["id"],

     

       148
       148
       +
                       }

     

       149
       149
       +
                   )

     

       150
       150
       +
                   inserted = self._get_post(self.url, self.user_id, rsp.json()["id"])

     

       151
       151
       +
                   if not inserted:

     

       152
       152
       +
                       raise ValueError("Inserted post not found!")

     

       153
       153
       +
                   self._insert_post_mapping(reposted["id"], inserted["id"])

     

       154
       154
       +
       

     

       155
       155
       +
           def delete_repost(self, service: str, user: str, repost_id: str):

     

       156
       156
       +
               repost = self._get_post(service, user, repost_id)

     

       157
       157
       +
               if not repost:

     

       158
       158
       +
                   self.log.info("Repost not found in db, skipping delete..")

     

       159
       159
       +
                   return

     

       160
       160
       +
       

     

       161
       161
       +
               mappings = self._get_mappings(repost["id"], self.url, self.user_id)

     

       162
       162
       +
               rmappings = self._get_mappings(repost["reposted"], self.url, self.user_id)

     

       163
       163
       +
       

     

       164
       164
       +
               if mappings and rmappings:

     

       165
       165
       +
                   self.log.info(

     

       166
       166
       +
                       "Removing '%s' Repost of '%s'...",

     

       167
       167
       +
                       mappings[0]["identifier"],

     

       168
       168
       +
                       rmappings[0]["identifier"],

     

       169
       169
       +
                   )

     

       170
       170
       +
                   requests.post(

     

       171
       171
       +
                       f"{self.url}/api/v1/statuses/{rmappings[0]['identifier']}/unreblog",

     

       172
       172
       +
                       headers={"Authorization": f"Bearer {self._get_token()}"},

     

       173
       173
       +
                   )

     

       174
       174
       +
                   self._delete_post_by_id(mappings[0]["id"])

     

       175
       175
       +
       

     

       176
       176
       +
           @override

     

       177
       177
       +
           def _get_token(self) -> str:

     

       178
       178
       +
               return self.options.token

+31

mastodon/parser.py

···

       1
       1
       +
       from typing import Any, override

     

       2
       2
       +
       

     

       3
       3
       +
       from cross.tokens import LinkToken, MentionToken, TagToken

     

       4
       4
       +
       from util.html import HTMLToTokensParser

     

       5
       5
       +
       

     

       6
       6
       +
       

     

       7
       7
       +
       class StatusParser(HTMLToTokensParser):

     

       8
       8
       +
           def __init__(self, status: dict[str, Any]) -> None:

     

       9
       9
       +
               super().__init__()

     

       10
       10
       +
               self.tags: set[str] = set(tag["url"] for tag in status.get("tags", []))

     

       11
       11
       +
               self.mentions: set[str] = set(m["url"] for m in status.get("mentions", []))

     

       12
       12
       +
       

     

       13
       13
       +
           @override

     

       14
       14
       +
           def handle_a_endtag(self):

     

       15
       15
       +
               label, _attr = self._tag_stack.pop("a")

     

       16
       16
       +
       

     

       17
       17
       +
               href = _attr.get("href")

     

       18
       18
       +
               if href:

     

       19
       19
       +
                   cls = _attr.get("class", "")

     

       20
       20
       +
                   if cls:

     

       21
       21
       +
                       if "hashtag" in cls and href in self.tags:

     

       22
       22
       +
                           tag = label[1:] if label.startswith("#") else label

     

       23
       23
       +
       

     

       24
       24
       +
                           self.tokens.append(TagToken(tag=tag))

     

       25
       25
       +
                           return

     

       26
       26
       +
                       if "mention" in cls and href in self.mentions:

     

       27
       27
       +
                           username = label[1:] if label.startswith("@") else label

     

       28
       28
       +
       

     

       29
       29
       +
                           self.tokens.append(MentionToken(username=username, uri=href))

     

       30
       30
       +
                           return

     

       31
       31
       +
                   self.tokens.append(LinkToken(href=href, label=label))

-13

migrations/001_initdb.sql

···

       1
       1
       -
       CREATE TABLE IF NOT EXISTS posts (

     

       2
       2
       -
           id         INTEGER PRIMARY KEY AUTOINCREMENT,

     

       3
       3
       -
           user_id    TEXT NOT NULL,

     

       4
       4
       -
           service    TEXT NOT NULL,

     

       5
       5
       -
           identifier TEXT NOT NULL,

     

       6
       6
       -
           parent_id  INTEGER NULL REFERENCES posts(id),

     

       7
       7
       -
           root_id    INTEGER NULL REFERENCES posts(id)

     

       8
       8
       -
       );

     

       9
       9
       -
       

     

       10
       10
       -
       CREATE TABLE IF NOT EXISTS mappings (

     

       11
       11
       -
           original_post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       12
       12
       -
           mapped_post_id   INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE

     

       13
       13
       -
       );

+21

migrations/001_initdb_v1.py

···

       1
       1
       +
       import sqlite3

     

       2
       2
       +
       

     

       3
       3
       +
       

     

       4
       4
       +
       def migrate(conn: sqlite3.Connection):

     

       5
       5
       +
           _ = conn.execute("""

     

       6
       6
       +
               CREATE TABLE IF NOT EXISTS posts (

     

       7
       7
       +
                   id         INTEGER PRIMARY KEY AUTOINCREMENT,

     

       8
       8
       +
                   user_id    TEXT NOT NULL,

     

       9
       9
       +
                   service    TEXT NOT NULL,

     

       10
       10
       +
                   identifier TEXT NOT NULL,

     

       11
       11
       +
                   parent_id  INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL,

     

       12
       12
       +
                   root_id    INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       13
       13
       +
               );

     

       14
       14
       +
           """)

     

       15
       15
       +
           _ = conn.execute("""

     

       16
       16
       +
               CREATE TABLE IF NOT EXISTS mappings (

     

       17
       17
       +
                   original_post_id INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       18
       18
       +
                   mapped_post_id   INTEGER NOT NULL

     

       19
       19
       +
               );

     

       20
       20
       +
           """)

     

       21
       21
       +
           pass

-2

migrations/002_add_reposted_column.sql

···

       1
       1
       -
       ALTER TABLE posts

     

       2
       2
       -
       ADD COLUMN reposted_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL;

+11

migrations/002_add_reposted_column_v1.py

···

       1
       1
       +
       import sqlite3

     

       2
       2
       +
       

     

       3
       3
       +
       

     

       4
       4
       +
       def migrate(conn: sqlite3.Connection):

     

       5
       5
       +
           columns = conn.execute("PRAGMA table_info(posts)")

     

       6
       6
       +
           column_names = [col[1] for col in columns]

     

       7
       7
       +
           if "reposted_id" not in column_names:

     

       8
       8
       +
               _ = conn.execute("""

     

       9
       9
       +
                   ALTER TABLE posts

     

       10
       10
       +
                   ADD COLUMN reposted_id INTEGER NULL REFERENCES posts(id) ON DELETE SET NULL

     

       11
       11
       +
               """)

-2

migrations/003_add_extra_data.sql

···

       1
       1
       -
       ALTER TABLE posts

     

       2
       2
       -
       ADD COLUMN extra_data TEXT NULL;

+22

migrations/003_add_extra_data_column_v1.py

···

       1
       1
       +
       import json

     

       2
       2
       +
       import sqlite3

     

       3
       3
       +
       

     

       4
       4
       +
       

     

       5
       5
       +
       def migrate(conn: sqlite3.Connection):

     

       6
       6
       +
           columns = conn.execute("PRAGMA table_info(posts)")

     

       7
       7
       +
           column_names = [col[1] for col in columns]

     

       8
       8
       +
           if "extra_data" not in column_names:

     

       9
       9
       +
               _ = conn.execute("""

     

       10
       10
       +
                   ALTER TABLE posts

     

       11
       11
       +
                   ADD COLUMN extra_data TEXT NULL

     

       12
       12
       +
               """)

     

       13
       13
       +
       

     

       14
       14
       +
           # migrate old bsky identifiers from json to uri as id and cid in extra_data

     

       15
       15
       +
           data = conn.execute("SELECT id, identifier FROM posts WHERE service = 'https://bsky.app';").fetchall()

     

       16
       16
       +
           rewrites: list[tuple[str, str, int]] = []

     

       17
       17
       +
           for row in data:

     

       18
       18
       +
               if row[1][0] == '{' and row[1][-1] == '}':

     

       19
       19
       +
                   data = json.loads(row[1])

     

       20
       20
       +
                   rewrites.append((data['uri'], json.dumps({'cid': data['cid']}), row[0]))

     

       21
       21
       +
           if rewrites:

     

       22
       22
       +
               _ = conn.executemany("UPDATE posts SET identifier = ?, extra_data = ? WHERE id = ?;", rewrites)

+52

migrations/004_initdb_next.py

···

       1
       1
       +
       import sqlite3

     

       2
       2
       +
       

     

       3
       3
       +
       

     

       4
       4
       +
       def migrate(conn: sqlite3.Connection):

     

       5
       5
       +
           cursor = conn.cursor()

     

       6
       6
       +
       

     

       7
       7
       +
           old_posts = cursor.execute("SELECT * FROM posts;").fetchall()

     

       8
       8
       +
           old_mappings = cursor.execute("SELECT * FROM mappings;").fetchall()

     

       9
       9
       +
       

     

       10
       10
       +
           _ = cursor.execute("DROP TABLE posts;")

     

       11
       11
       +
           _ = cursor.execute("DROP TABLE mappings;")

     

       12
       12
       +
       

     

       13
       13
       +
           _ = cursor.execute("""

     

       14
       14
       +
               CREATE TABLE posts (

     

       15
       15
       +
                   id          INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT,

     

       16
       16
       +
                   user        TEXT NOT NULL,

     

       17
       17
       +
                   service     TEXT NOT NULL,

     

       18
       18
       +
                   identifier  TEXT NOT NULL,

     

       19
       19
       +
                   parent      INTEGER NULL REFERENCES posts(id),

     

       20
       20
       +
                   root        INTEGER NULL REFERENCES posts(id),

     

       21
       21
       +
                   reposted    INTEGER NULL REFERENCES posts(id),

     

       22
       22
       +
                   extra_data  TEXT NULL

     

       23
       23
       +
               );

     

       24
       24
       +
           """)

     

       25
       25
       +
       

     

       26
       26
       +
           _ = cursor.execute("""

     

       27
       27
       +
               CREATE TABLE mappings (

     

       28
       28
       +
                   original INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       29
       29
       +
                   mapped   INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,

     

       30
       30
       +
                   UNIQUE(original, mapped)

     

       31
       31
       +
               );

     

       32
       32
       +
           """)

     

       33
       33
       +
       

     

       34
       34
       +
           for old_post in old_posts:

     

       35
       35
       +
               _ = cursor.execute(

     

       36
       36
       +
                   """

     

       37
       37
       +
                   INSERT INTO posts (id, user, service, identifier, parent, root, reposted, extra_data)

     

       38
       38
       +
                   VALUES (:id, :user_id, :service, :identifier, :parent_id, :root_id, :reposted_id, :extra_data)

     

       39
       39
       +
               """,

     

       40
       40
       +
                   dict(old_post),

     

       41
       41
       +
               )

     

       42
       42
       +
       

     

       43
       43
       +
           for mapping in old_mappings:

     

       44
       44
       +
               original, mapped = mapping["original_post_id"], mapping["mapped_post_id"]

     

       45
       45
       +
               _ = cursor.execute(

     

       46
       46
       +
                   "INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?)",

     

       47
       47
       +
                   (original, mapped),

     

       48
       48
       +
               )

     

       49
       49
       +
               _ = cursor.execute(

     

       50
       50
       +
                   "INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?)",

     

       51
       51
       +
                   (mapped, original),

     

       52
       52
       +
               )

+12

migrations/005_add_indexes.py

···

       1
       1
       +
       import sqlite3

     

       2
       2
       +
       

     

       3
       3
       +
       

     

       4
       4
       +
       def migrate(conn: sqlite3.Connection):

     

       5
       5
       +
           _ = conn.execute("""

     

       6
       6
       +
               CREATE INDEX IF NOT EXISTS idx_posts_service_user_identifier

     

       7
       7
       +
               ON posts (service, user, identifier);

     

       8
       8
       +
           """)

     

       9
       9
       +
           _ = conn.execute("""

     

       10
       10
       +
               CREATE UNIQUE INDEX IF NOT EXISTS ux_mappings_original_mapped

     

       11
       11
       +
               ON mappings (original, mapped);

     

       12
       12
       +
           """)

+35

migrations/_registry.py

···

       1
       1
       +
       import importlib.util

     

       2
       2
       +
       from pathlib import Path

     

       3
       3
       +
       import sqlite3

     

       4
       4
       +
       from typing import Callable

     

       5
       5
       +
       

     

       6
       6
       +
       

     

       7
       7
       +
       def load_migrations(path: Path) -> list[tuple[int, str, Callable[[sqlite3.Connection], None]]]:

     

       8
       8
       +
           migrations: list[tuple[int, str, Callable[[sqlite3.Connection], None]]] = []

     

       9
       9
       +
           migration_files = sorted(

     

       10
       10
       +
               [f for f in path.glob("*.py") if not f.stem.startswith("_")]

     

       11
       11
       +
           )

     

       12
       12
       +
       

     

       13
       13
       +
           for filepath in migration_files:

     

       14
       14
       +
               filename = filepath.stem

     

       15
       15
       +
               version_str = filename.split("_")[0]

     

       16
       16
       +
       

     

       17
       17
       +
               try:

     

       18
       18
       +
                   version = int(version_str)

     

       19
       19
       +
               except ValueError:

     

       20
       20
       +
                   raise ValueError('migrations must start with a number!!')

     

       21
       21
       +
       

     

       22
       22
       +
               spec = importlib.util.spec_from_file_location(filepath.stem, filepath)

     

       23
       23
       +
               if not spec or not spec.loader:

     

       24
       24
       +
                   raise Exception(f"Failed to load spec from file: {filepath}")

     

       25
       25
       +
       

     

       26
       26
       +
               module = importlib.util.module_from_spec(spec)

     

       27
       27
       +
               spec.loader.exec_module(module)

     

       28
       28
       +
       

     

       29
       29
       +
               if hasattr(module, "migrate"):

     

       30
       30
       +
                   migrations.append((version, filename, module.migrate))

     

       31
       31
       +
               else:

     

       32
       32
       +
                   raise ValueError(f"Migration {filepath.name} missing 'migrate' function")

     

       33
       33
       +
       

     

       34
       34
       +
           migrations.sort(key=lambda x: x[0])

     

       35
       35
       +
           return migrations

+22

misskey/info.py

···

       1
       1
       +
       from abc import ABC, abstractmethod

     

       2
       2
       +
       

     

       3
       3
       +
       import requests

     

       4
       4
       +
       

     

       5
       5
       +
       from cross.service import Service

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       class MisskeyService(ABC, Service):

     

       9
       9
       +
           def verify_credentials(self):

     

       10
       10
       +
               response = requests.post(

     

       11
       11
       +
                   f"{self.url}/api/i",

     

       12
       12
       +
                   json={"i": self._get_token()},

     

       13
       13
       +
                   headers={"Content-Type": "application/json"},

     

       14
       14
       +
               )

     

       15
       15
       +
               if response.status_code != 200:

     

       16
       16
       +
                   self.log.error("Failed to validate user credentials!")

     

       17
       17
       +
                   response.raise_for_status()

     

       18
       18
       +
               return dict(response.json())

     

       19
       19
       +
       

     

       20
       20
       +
           @abstractmethod

     

       21
       21
       +
           def _get_token(self) -> str:

     

       22
       22
       +
               pass

+227

misskey/input.py

···

       1
       1
       +
       import asyncio

     

       2
       2
       +
       import json

     

       3
       3
       +
       import re

     

       4
       4
       +
       import uuid

     

       5
       5
       +
       from dataclasses import dataclass, field

     

       6
       6
       +
       from typing import Any, cast, override

     

       7
       7
       +
       

     

       8
       8
       +
       import websockets

     

       9
       9
       +
       

     

       10
       10
       +
       from cross.attachments import (

     

       11
       11
       +
           LabelsAttachment,

     

       12
       12
       +
           MediaAttachment,

     

       13
       13
       +
           QuoteAttachment,

     

       14
       14
       +
           RemoteUrlAttachment,

     

       15
       15
       +
           SensitiveAttachment,

     

       16
       16
       +
       )

     

       17
       17
       +
       from cross.media import Blob, download_blob

     

       18
       18
       +
       from cross.post import Post

     

       19
       19
       +
       from cross.service import InputService

     

       20
       20
       +
       from database.connection import DatabasePool

     

       21
       21
       +
       from misskey.info import MisskeyService

     

       22
       22
       +
       from util.markdown import MarkdownParser

     

       23
       23
       +
       from util.util import normalize_service_url

     

       24
       24
       +
       

     

       25
       25
       +
       ALLOWED_VISIBILITY = ["public", "home"]

     

       26
       26
       +
       

     

       27
       27
       +
       

     

       28
       28
       +
       @dataclass

     

       29
       29
       +
       class MisskeyInputOptions:

     

       30
       30
       +
           token: str

     

       31
       31
       +
           instance: str

     

       32
       32
       +
           allowed_visibility: list[str] = field(

     

       33
       33
       +
               default_factory=lambda: ALLOWED_VISIBILITY.copy()

     

       34
       34
       +
           )

     

       35
       35
       +
           filters: list[re.Pattern[str]] = field(default_factory=lambda: [])

     

       36
       36
       +
       

     

       37
       37
       +
           @classmethod

     

       38
       38
       +
           def from_dict(cls, data: dict[str, Any]) -> "MisskeyInputOptions":

     

       39
       39
       +
               data["instance"] = normalize_service_url(data["instance"])

     

       40
       40
       +
       

     

       41
       41
       +
               if "allowed_visibility" in data:

     

       42
       42
       +
                   for vis in data.get("allowed_visibility", []):

     

       43
       43
       +
                       if vis not in ALLOWED_VISIBILITY:

     

       44
       44
       +
                           raise ValueError(f"Invalid visibility option {vis}!")

     

       45
       45
       +
       

     

       46
       46
       +
               if "filters" in data:

     

       47
       47
       +
                   data["filters"] = [re.compile(r) for r in data["filters"]]

     

       48
       48
       +
       

     

       49
       49
       +
               return MisskeyInputOptions(**data)

     

       50
       50
       +
       

     

       51
       51
       +
       

     

       52
       52
       +
       class MisskeyInputService(MisskeyService, InputService):

     

       53
       53
       +
           def __init__(self, db: DatabasePool, options: MisskeyInputOptions) -> None:

     

       54
       54
       +
               super().__init__(options.instance, db)

     

       55
       55
       +
               self.options: MisskeyInputOptions = options

     

       56
       56
       +
       

     

       57
       57
       +
               self.log.info("Verifying %s credentails...", self.url)

     

       58
       58
       +
               response = self.verify_credentials()

     

       59
       59
       +
               self.user_id: str = response["id"]

     

       60
       60
       +
       

     

       61
       61
       +
           @override

     

       62
       62
       +
           def _get_token(self) -> str:

     

       63
       63
       +
               return self.options.token

     

       64
       64
       +
       

     

       65
       65
       +
           def _on_note(self, note: dict[str, Any]):

     

       66
       66
       +
               if note["userId"] != self.user_id:

     

       67
       67
       +
                   return

     

       68
       68
       +
       

     

       69
       69
       +
               if note["visibility"] not in self.options.allowed_visibility:

     

       70
       70
       +
                   return

     

       71
       71
       +
       

     

       72
       72
       +
               if note.get("poll"):

     

       73
       73
       +
                   self.log.info("Skipping '%s'! Contains a poll..", note["id"])

     

       74
       74
       +
                   return

     

       75
       75
       +
       

     

       76
       76
       +
               renote: dict[str, Any] | None = note.get("renote")

     

       77
       77
       +
               if renote:

     

       78
       78
       +
                   if note.get("text") is None:

     

       79
       79
       +
                       self._on_renote(note, renote)

     

       80
       80
       +
                       return

     

       81
       81
       +
       

     

       82
       82
       +
                   if renote["userId"] != self.user_id:

     

       83
       83
       +
                       return

     

       84
       84
       +
       

     

       85
       85
       +
                   rrenote = self._get_post(self.url, self.user_id, renote["id"])

     

       86
       86
       +
                   if not rrenote:

     

       87
       87
       +
                       self.log.info(

     

       88
       88
       +
                           "Skipping %s, quote %s not found in db", note["id"], renote["id"]

     

       89
       89
       +
                       )

     

       90
       90
       +
                       return

     

       91
       91
       +
       

     

       92
       92
       +
               reply: dict[str, Any] | None = note.get("reply")

     

       93
       93
       +
               if reply:

     

       94
       94
       +
                   if reply.get("userId") != self.user_id:

     

       95
       95
       +
                       self.log.info("Skipping '%s'! Reply to other user..", note["id"])

     

       96
       96
       +
                       return

     

       97
       97
       +
       

     

       98
       98
       +
               parent = None

     

       99
       99
       +
               if reply:

     

       100
       100
       +
                   parent = self._get_post(self.url, self.user_id, reply["id"])

     

       101
       101
       +
                   if not parent:

     

       102
       102
       +
                       self.log.info(

     

       103
       103
       +
                           "Skipping %s, parent %s not found in db", note["id"], reply["id"]

     

       104
       104
       +
                       )

     

       105
       105
       +
                       return

     

       106
       106
       +
       

     

       107
       107
       +
               mention_handles: dict = note.get("mentionHandles") or {}

     

       108
       108
       +
               tags: list[str] = note.get("tags") or []

     

       109
       109
       +
       

     

       110
       110
       +
               handles: list[tuple[str, str]] = []

     

       111
       111
       +
               for key, value in mention_handles.items():

     

       112
       112
       +
                   handles.append((value, value))

     

       113
       113
       +
       

     

       114
       114
       +
               parser = MarkdownParser()  # TODO MFM parser

     

       115
       115
       +
               tokens = parser.parse(note.get("text", ""), tags, handles)

     

       116
       116
       +
               post = Post(id=note["id"], parent_id=reply["id"] if reply else None, tokens=tokens)

     

       117
       117
       +
       

     

       118
       118
       +
               post.attachments.put(RemoteUrlAttachment(url=self.url + "/notes/" + note["id"]))

     

       119
       119
       +
               if renote:

     

       120
       120
       +
                   post.attachments.put(QuoteAttachment(quoted_id=renote['id'], quoted_user=self.user_id))

     

       121
       121
       +
               if any([a.get("isSensitive", False) for a in note.get("files", [])]):

     

       122
       122
       +
                   post.attachments.put(SensitiveAttachment(sensitive=True))

     

       123
       123
       +
               if note.get("cw"):

     

       124
       124
       +
                   post.attachments.put(LabelsAttachment(labels=[note["cw"]]))

     

       125
       125
       +
       

     

       126
       126
       +
               blobs: list[Blob] = []

     

       127
       127
       +
               for media in note.get("files", []):

     

       128
       128
       +
                   self.log.info("Downloading %s...", media["url"])

     

       129
       129
       +
                   blob: Blob | None = download_blob(media["url"], media.get("comment", ""))

     

       130
       130
       +
                   if not blob:

     

       131
       131
       +
                       self.log.error(

     

       132
       132
       +
                           "Skipping %s! Failed to download media %s.",

     

       133
       133
       +
                           note["id"],

     

       134
       134
       +
                           media["url"],

     

       135
       135
       +
                       )

     

       136
       136
       +
                       return

     

       137
       137
       +
                   blobs.append(blob)

     

       138
       138
       +
       

     

       139
       139
       +
               if blobs:

     

       140
       140
       +
                   post.attachments.put(MediaAttachment(blobs=blobs))

     

       141
       141
       +
       

     

       142
       142
       +
               if parent:

     

       143
       143
       +
                   self._insert_post(

     

       144
       144
       +
                       {

     

       145
       145
       +
                           "user": self.user_id,

     

       146
       146
       +
                           "service": self.url,

     

       147
       147
       +
                           "identifier": note["id"],

     

       148
       148
       +
                           "parent": parent["id"],

     

       149
       149
       +
                           "root": parent["id"] if not parent["root"] else parent["root"],

     

       150
       150
       +
                       }

     

       151
       151
       +
                   )

     

       152
       152
       +
               else:

     

       153
       153
       +
                   self._insert_post(

     

       154
       154
       +
                       {

     

       155
       155
       +
                           "user": self.user_id,

     

       156
       156
       +
                           "service": self.url,

     

       157
       157
       +
                           "identifier": note["id"],

     

       158
       158
       +
                       }

     

       159
       159
       +
                   )

     

       160
       160
       +
       

     

       161
       161
       +
               for out in self.outputs:

     

       162
       162
       +
                   self.submitter(lambda: out.accept_post(post))

     

       163
       163
       +
       

     

       164
       164
       +
           def _on_renote(self, note: dict[str, Any], renote: dict[str, Any]):

     

       165
       165
       +
               reposted = self._get_post(self.url, self.user_id, renote["id"])

     

       166
       166
       +
               if not reposted:

     

       167
       167
       +
                   self.log.info(

     

       168
       168
       +
                       "Skipping repost '%s' as reposted post '%s' was not found in the db.",

     

       169
       169
       +
                       note["id"],

     

       170
       170
       +
                       renote["id"],

     

       171
       171
       +
                   )

     

       172
       172
       +
                   return

     

       173
       173
       +
       

     

       174
       174
       +
               self._insert_post(

     

       175
       175
       +
                   {

     

       176
       176
       +
                       "user": self.user_id,

     

       177
       177
       +
                       "service": self.url,

     

       178
       178
       +
                       "identifier": note["id"],

     

       179
       179
       +
                       "reposted": reposted["id"],

     

       180
       180
       +
                   }

     

       181
       181
       +
               )

     

       182
       182
       +
       

     

       183
       183
       +
               for out in self.outputs:

     

       184
       184
       +
                   self.submitter(lambda: out.accept_repost(note["id"], renote["id"]))

     

       185
       185
       +
       

     

       186
       186
       +
           def _accept_msg(self, msg: websockets.Data) -> None:

     

       187
       187
       +
               data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))

     

       188
       188
       +
       

     

       189
       189
       +
               if data["type"] == "channel":

     

       190
       190
       +
                   type: str = cast(str, data["body"]["type"])

     

       191
       191
       +
                   if type == "note" or type == "reply":

     

       192
       192
       +
                       note_body = data["body"]["body"]

     

       193
       193
       +
                       self._on_note(note_body)

     

       194
       194
       +
                       return

     

       195
       195
       +
       

     

       196
       196
       +
           async def _subscribe_to_home(self, ws: websockets.ClientConnection) -> None:

     

       197
       197
       +
               await ws.send(

     

       198
       198
       +
                   json.dumps(

     

       199
       199
       +
                       {

     

       200
       200
       +
                           "type": "connect",

     

       201
       201
       +
                           "body": {"channel": "homeTimeline", "id": str(uuid.uuid4())},

     

       202
       202
       +
                       }

     

       203
       203
       +
                   )

     

       204
       204
       +
               )

     

       205
       205
       +
               self.log.info("Subscribed to 'homeTimeline' channel...")

     

       206
       206
       +
       

     

       207
       207
       +
           @override

     

       208
       208
       +
           async def listen(self):

     

       209
       209
       +
               streaming: str = f"{'wss' if self.url.startswith('https') else 'ws'}://{self.url.split('://', 1)[1]}"

     

       210
       210
       +
               url: str = f"{streaming}/streaming?i={self.options.token}"

     

       211
       211
       +
       

     

       212
       212
       +
               async for ws in websockets.connect(url):

     

       213
       213
       +
                   try:

     

       214
       214
       +
                       self.log.info("Listening to %s...", streaming)

     

       215
       215
       +
                       await self._subscribe_to_home(ws)

     

       216
       216
       +
       

     

       217
       217
       +
                       async def listen_for_messages():

     

       218
       218
       +
                           async for msg in ws:

     

       219
       219
       +
                               self.submitter(lambda: self._accept_msg(msg))

     

       220
       220
       +
       

     

       221
       221
       +
                       listen = asyncio.create_task(listen_for_messages())

     

       222
       222
       +
       

     

       223
       223
       +
                       _ = await asyncio.gather(listen)

     

       224
       224
       +
                   except websockets.ConnectionClosedError as e:

     

       225
       225
       +
                       self.log.error(e, stack_info=True, exc_info=True)

     

       226
       226
       +
                       self.log.info("Reconnecting to %s...", streaming)

     

       227
       227
       +
                       continue

+10

pyproject.toml

···

       5
       5
        
       readme = "README.md"

     

       6
       6
        
       requires-python = ">=3.12"

     

       7
       7
        
       dependencies = [

     

       8
       8
       +
           "dnspython>=2.8.0",

     

       9
       9
       +
           "grapheme>=0.6.0",

     

       8
       10
        
           "python-magic>=0.4.27",

     

       9
       11
        
           "requests>=2.32.5",

     

       10
       12
        
           "websockets>=15.0.1",

     

       11
       13
        
       ]

     

       14
       14
       +
       

     

       15
       15
       +
       [dependency-groups]

     

       16
       16
       +
       dev = [

     

       17
       17
       +
           "pytest>=8.4.2",

     

       18
       18
       +
       ]

     

       19
       19
       +
       

     

       20
       20
       +
       [tool.pytest.ini_options]

     

       21
       21
       +
       pythonpath = ["."]

+32

registry.py

···

       1
       1
       +
       from pathlib import Path

     

       2
       2
       +
       from typing import Any, Callable

     

       3
       3
       +
       

     

       4
       4
       +
       from cross.service import InputService, OutputService

     

       5
       5
       +
       from database.connection import DatabasePool

     

       6
       6
       +
       

     

       7
       7
       +
       input_factories: dict[str, Callable[[DatabasePool, dict[str, Any]], InputService]] = {}

     

       8
       8
       +
       output_factories: dict[str, Callable[[DatabasePool, dict[str, Any]], OutputService]] = {}

     

       9
       9
       +
       

     

       10
       10
       +
       

     

       11
       11
       +
       def create_input_service(db: DatabasePool, data: dict[str, Any]) -> InputService:

     

       12
       12
       +
           if "type" not in data:

     

       13
       13
       +
               raise ValueError("No `type` field in input data!")

     

       14
       14
       +
           type: str = str(data["type"])

     

       15
       15
       +
           del data["type"]

     

       16
       16
       +
       

     

       17
       17
       +
           factory = input_factories.get(type)

     

       18
       18
       +
           if not factory:

     

       19
       19
       +
               raise KeyError(f"No such input service {type}!")

     

       20
       20
       +
           return factory(db, data)

     

       21
       21
       +
       

     

       22
       22
       +
       

     

       23
       23
       +
       def create_output_service(db: DatabasePool, data: dict[str, Any]) -> OutputService:

     

       24
       24
       +
           if "type" not in data:

     

       25
       25
       +
               raise ValueError("No `type` field in input data!")

     

       26
       26
       +
           type: str = str(data["type"])

     

       27
       27
       +
           del data["type"]

     

       28
       28
       +
       

     

       29
       29
       +
           factory = output_factories.get(type)

     

       30
       30
       +
           if not factory:

     

       31
       31
       +
               raise KeyError(f"No such output service {type}!")

     

       32
       32
       +
           return factory(db, data)

+33

registry_bootstrap.py

···

       1
       1
       +
       from typing import Any

     

       2
       2
       +
       

     

       3
       3
       +
       from database.connection import DatabasePool

     

       4
       4
       +
       from registry import input_factories, output_factories

     

       5
       5
       +
       

     

       6
       6
       +
       

     

       7
       7
       +
       class LazyFactory:

     

       8
       8
       +
           def __init__(self, module_path: str, class_name: str, options_class_name: str):

     

       9
       9
       +
               self.module_path: str = module_path

     

       10
       10
       +
               self.class_name: str = class_name

     

       11
       11
       +
               self.options_class_name: str = options_class_name

     

       12
       12
       +
       

     

       13
       13
       +
           def __call__(self, db: DatabasePool, d: dict[str, Any]):

     

       14
       14
       +
               module = __import__(

     

       15
       15
       +
                   self.module_path, fromlist=[self.class_name, self.options_class_name]

     

       16
       16
       +
               )

     

       17
       17
       +
               service_class = getattr(module, self.class_name)

     

       18
       18
       +
               options_class = getattr(module, self.options_class_name)

     

       19
       19
       +
               return service_class(db, options_class.from_dict(d))

     

       20
       20
       +
       

     

       21
       21
       +
       def bootstrap():

     

       22
       22
       +
           input_factories["mastodon-wss"] = LazyFactory(

     

       23
       23
       +
               "mastodon.input", "MastodonInputService", "MastodonInputOptions"

     

       24
       24
       +
           )

     

       25
       25
       +
           input_factories["misskey-wss"] = LazyFactory(

     

       26
       26
       +
               "misskey.input", "MisskeyInputService", "MisskeyInputOptions"

     

       27
       27
       +
           )

     

       28
       28
       +
           input_factories["bluesky-jetstream"] = LazyFactory(

     

       29
       29
       +
               "bluesky.input", "BlueskyJetstreamInputService", "BlueskyJetstreamInputOptions"

     

       30
       30
       +
           )

     

       31
       31
       +
           output_factories['stderr'] = LazyFactory(

     

       32
       32
       +
               "util.dummy", "StderrOutputService", "DummyOptions"

     

       33
       33
       +
           )

+61

tests/util/util_test.py

···

       1
       1
       +
       import util.util as u

     

       2
       2
       +
       from unittest.mock import patch

     

       3
       3
       +
       import pytest

     

       4
       4
       +
       

     

       5
       5
       +
       

     

       6
       6
       +
       def test_normalize_service_url_http():

     

       7
       7
       +
           assert u.normalize_service_url("http://example.com") == "http://example.com"

     

       8
       8
       +
           assert u.normalize_service_url("http://example.com/") == "http://example.com"

     

       9
       9
       +
       

     

       10
       10
       +
       

     

       11
       11
       +
       def test_normalize_service_url_invalid_schemes():

     

       12
       12
       +
           with pytest.raises(ValueError, match="Invalid service url"):

     

       13
       13
       +
               _ = u.normalize_service_url("ftp://example.com")

     

       14
       14
       +
           with pytest.raises(ValueError, match="Invalid service url"):

     

       15
       15
       +
               _ = u.normalize_service_url("example.com")

     

       16
       16
       +
           with pytest.raises(ValueError, match="Invalid service url"):

     

       17
       17
       +
               _ = u.normalize_service_url("//example.com")

     

       18
       18
       +
       

     

       19
       19
       +
       

     

       20
       20
       +
       def test_read_env_missing_env_var():

     

       21
       21
       +
           data = {"token": "env:MISSING_VAR", "keep": "value"}

     

       22
       22
       +
           with patch.dict("os.environ", {}, clear=True):

     

       23
       23
       +
               u.read_env(data)

     

       24
       24
       +
           assert data == {"keep": "value"}

     

       25
       25
       +
           assert "token" not in data

     

       26
       26
       +
       

     

       27
       27
       +
       

     

       28
       28
       +
       def test_read_env_no_env_prefix():

     

       29
       29
       +
           data = {"token": "literal_value", "number": 123}

     

       30
       30
       +
           u.read_env(data)

     

       31
       31
       +
           assert data == {"token": "literal_value", "number": 123}

     

       32
       32
       +
       

     

       33
       33
       +
       

     

       34
       34
       +
       def test_read_env_deeply_nested():

     

       35
       35
       +
           data = {"level1": {"level2": {"token": "env:DEEP_TOKEN"}}}

     

       36
       36
       +
           with patch.dict("os.environ", {"DEEP_TOKEN": "deep_secret"}):

     

       37
       37
       +
               u.read_env(data)

     

       38
       38
       +
           assert data["level1"]["level2"]["token"] == "deep_secret"

     

       39
       39
       +
       

     

       40
       40
       +
       

     

       41
       41
       +
       def test_read_env_mixed_types():

     

       42
       42
       +
           data = {

     

       43
       43
       +
               "string": "env:TOKEN",

     

       44
       44
       +
               "number": 42,

     

       45
       45
       +
               "list": [1, 2, 3],

     

       46
       46
       +
               "none": None,

     

       47
       47
       +
               "bool": True,

     

       48
       48
       +
           }

     

       49
       49
       +
           with patch.dict("os.environ", {"TOKEN": "secret"}):

     

       50
       50
       +
               u.read_env(data)

     

       51
       51
       +
           assert data["string"] == "secret"

     

       52
       52
       +
           assert data["number"] == 42

     

       53
       53
       +
           assert data["list"] == [1, 2, 3]

     

       54
       54
       +
           assert data["none"] is None

     

       55
       55
       +
           assert data["bool"] is True

     

       56
       56
       +
       

     

       57
       57
       +
       

     

       58
       58
       +
       def test_read_env_empty_dict():

     

       59
       59
       +
           data = {}

     

       60
       60
       +
           u.read_env(data)

     

       61
       61
       +
           assert data == {}

+49

util/cache.py

···

       1
       1
       +
       from abc import ABC, abstractmethod

     

       2
       2
       +
       from pathlib import Path

     

       3
       3
       +
       import time

     

       4
       4
       +
       from typing import Generic, TypeVar, override

     

       5
       5
       +
       import pickle

     

       6
       6
       +
       

     

       7
       7
       +
       K = TypeVar("K")

     

       8
       8
       +
       V = TypeVar("V")

     

       9
       9
       +
       

     

       10
       10
       +
       class Cacheable(ABC):

     

       11
       11
       +
           @abstractmethod

     

       12
       12
       +
           def dump_cache(self, path: Path):

     

       13
       13
       +
               pass

     

       14
       14
       +
       

     

       15
       15
       +
           @abstractmethod

     

       16
       16
       +
           def load_cache(self, path: Path):

     

       17
       17
       +
               pass

     

       18
       18
       +
       

     

       19
       19
       +
       class TTLCache(Generic[K, V], Cacheable):

     

       20
       20
       +
           def __init__(self, ttl_seconds: int = 3600) -> None:

     

       21
       21
       +
               self.ttl: int = ttl_seconds

     

       22
       22
       +
               self.__cache: dict[K, tuple[V, float]] = {}

     

       23
       23
       +
       

     

       24
       24
       +
           def get(self, key: K) -> V | None:

     

       25
       25
       +
               if key in self.__cache:

     

       26
       26
       +
                   value, timestamp = self.__cache[key]

     

       27
       27
       +
                   if time.time() - timestamp < self.ttl:

     

       28
       28
       +
                       return value

     

       29
       29
       +
                   else:

     

       30
       30
       +
                       del self.__cache[key]

     

       31
       31
       +
               return None

     

       32
       32
       +
       

     

       33
       33
       +
           def set(self, key: K, value: V) -> None:

     

       34
       34
       +
                   self.__cache[key] = (value, time.time())

     

       35
       35
       +
       

     

       36
       36
       +
           def clear(self) -> None:

     

       37
       37
       +
               self.__cache.clear()

     

       38
       38
       +
       

     

       39
       39
       +
           @override

     

       40
       40
       +
           def dump_cache(self, path: Path) -> None:

     

       41
       41
       +
               path.parent.mkdir(parents=True, exist_ok=True)

     

       42
       42
       +
               with open(path, 'wb') as f:

     

       43
       43
       +
                   pickle.dump(self.__cache, f)

     

       44
       44
       +
       

     

       45
       45
       +
           @override

     

       46
       46
       +
           def load_cache(self, path: Path):

     

       47
       47
       +
               if path.exists():

     

       48
       48
       +
                   with open(path, 'rb') as f:

     

       49
       49
       +
                       self.__cache = pickle.load(f)

+29

util/dummy.py

···

       1
       1
       +
       from typing import override

     

       2
       2
       +
       from cross.post import Post

     

       3
       3
       +
       from cross.service import OutputService

     

       4
       4
       +
       from database.connection import DatabasePool

     

       5
       5
       +
       

     

       6
       6
       +
       class DummyOptions:

     

       7
       7
       +
           @classmethod

     

       8
       8
       +
           def from_dict(cls, obj) -> 'DummyOptions':

     

       9
       9
       +
               return DummyOptions()

     

       10
       10
       +
       

     

       11
       11
       +
       class StderrOutputService(OutputService):

     

       12
       12
       +
           def __init__(self, db: DatabasePool, options: DummyOptions) -> None:

     

       13
       13
       +
               super().__init__("http://localhost", db)

     

       14
       14
       +
       

     

       15
       15
       +
           @override

     

       16
       16
       +
           def accept_post(self, post: Post):

     

       17
       17
       +
               self.log.info("%s", post)

     

       18
       18
       +
       

     

       19
       19
       +
           @override

     

       20
       20
       +
           def accept_repost(self, repost_id: str, reposted_id: str):

     

       21
       21
       +
               self.log.info("%s, %s", repost_id, reposted_id)

     

       22
       22
       +
       

     

       23
       23
       +
           @override

     

       24
       24
       +
           def delete_post(self, post_id: str):

     

       25
       25
       +
               self.log.info("%s", post_id)

     

       26
       26
       +
       

     

       27
       27
       +
           @override

     

       28
       28
       +
           def delete_repost(self, repost_id: str):

     

       29
       29
       +
               self.log.info("%s", repost_id)

+150

util/html.py

···

       1
       1
       +
       from html.parser import HTMLParser

     

       2
       2
       +
       from typing import override

     

       3
       3
       +
       

     

       4
       4
       +
       from cross.tokens import LinkToken, TextToken, Token

     

       5
       5
       +
       from util.splitter import canonical_label

     

       6
       6
       +
       

     

       7
       7
       +
       

     

       8
       8
       +
       class HTMLToTokensParser(HTMLParser):

     

       9
       9
       +
           def __init__(self) -> None:

     

       10
       10
       +
               super().__init__()

     

       11
       11
       +
               self.tokens: list[Token] = []

     

       12
       12
       +
       

     

       13
       13
       +
               self._tag_stack: dict[str, tuple[str, dict[str, str | None]]] = {}

     

       14
       14
       +
               self.in_pre: bool = False

     

       15
       15
       +
               self.in_code: bool = False

     

       16
       16
       +
               self.invisible: bool = False

     

       17
       17
       +
       

     

       18
       18
       +
           def handle_a_endtag(self):

     

       19
       19
       +
               label, _attr = self._tag_stack.pop("a")

     

       20
       20
       +
       

     

       21
       21
       +
               href = _attr.get("href")

     

       22
       22
       +
               if href:

     

       23
       23
       +
                   if canonical_label(label, href):

     

       24
       24
       +
                       self.tokens.append(LinkToken(href=href))

     

       25
       25
       +
                   else:

     

       26
       26
       +
                       self.tokens.append(LinkToken(href=href, label=label))

     

       27
       27
       +
       

     

       28
       28
       +
           def append_text(self, text: str):

     

       29
       29
       +
               self.tokens.append(TextToken(text=text))

     

       30
       30
       +
       

     

       31
       31
       +
           def append_newline(self):

     

       32
       32
       +
               if self.tokens:

     

       33
       33
       +
                   last_token = self.tokens[-1]

     

       34
       34
       +
                   if isinstance(last_token, TextToken) and not last_token.text.endswith("\n"):

     

       35
       35
       +
                       self.tokens.append(TextToken(text="\n"))

     

       36
       36
       +
       

     

       37
       37
       +
           @override

     

       38
       38
       +
           def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None:

     

       39
       39
       +
               _attr = dict(attrs)

     

       40
       40
       +
       

     

       41
       41
       +
               if self.invisible:

     

       42
       42
       +
                   return

     

       43
       43
       +
       

     

       44
       44
       +
               match tag:

     

       45
       45
       +
                   case "p":

     

       46
       46
       +
                       cls = _attr.get("class", "")

     

       47
       47
       +
                       if cls and "quote-inline" in cls:

     

       48
       48
       +
                           self.invisible = True

     

       49
       49
       +
                   case "a":

     

       50
       50
       +
                       self._tag_stack["a"] = ("", _attr)

     

       51
       51
       +
                   case "code":

     

       52
       52
       +
                       if not self.in_pre:

     

       53
       53
       +
                           self.append_text("`")

     

       54
       54
       +
                           self.in_code = True

     

       55
       55
       +
                   case "pre":

     

       56
       56
       +
                       self.append_newline()

     

       57
       57
       +
                       self.append_text("```\n")

     

       58
       58
       +
                       self.in_pre = True

     

       59
       59
       +
                   case "blockquote":

     

       60
       60
       +
                       self.append_newline()

     

       61
       61
       +
                       self.append_text("> ")

     

       62
       62
       +
                   case "strong" | "b":

     

       63
       63
       +
                       self.append_text("**")

     

       64
       64
       +
                   case "em" | "i":

     

       65
       65
       +
                       self.append_text("*")

     

       66
       66
       +
                   case "del" | "s":

     

       67
       67
       +
                       self.append_text("~~")

     

       68
       68
       +
                   case "br":

     

       69
       69
       +
                       self.append_text("\n")

     

       70
       70
       +
                   case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":

     

       71
       71
       +
                       level = int(tag[1])

     

       72
       72
       +
                       self.append_text("\n" + "#" * level + " ")

     

       73
       73
       +
                   case _:

     

       74
       74
       +
                       # self.builder.extend(f"<{tag}>".encode("utf-8"))

     

       75
       75
       +
                       pass

     

       76
       76
       +
       

     

       77
       77
       +
           @override

     

       78
       78
       +
           def handle_endtag(self, tag: str) -> None:

     

       79
       79
       +
               if self.invisible:

     

       80
       80
       +
                   if tag == "p":

     

       81
       81
       +
                       self.invisible = False

     

       82
       82
       +
                   return

     

       83
       83
       +
       

     

       84
       84
       +
               match tag:

     

       85
       85
       +
                   case "a":

     

       86
       86
       +
                       if "a" in self._tag_stack:

     

       87
       87
       +
                           self.handle_a_endtag()

     

       88
       88
       +
                   case "code":

     

       89
       89
       +
                       if not self.in_pre and self.in_code:

     

       90
       90
       +
                           self.append_text("`")

     

       91
       91
       +
                           self.in_code = False

     

       92
       92
       +
                   case "pre":

     

       93
       93
       +
                       self.append_newline()

     

       94
       94
       +
                       self.append_text("```\n")

     

       95
       95
       +
                       self.in_pre = False

     

       96
       96
       +
                   case "blockquote":

     

       97
       97
       +
                       self.append_text("\n")

     

       98
       98
       +
                   case "strong" | "b":

     

       99
       99
       +
                       self.append_text("**")

     

       100
       100
       +
                   case "em" | "i":

     

       101
       101
       +
                       self.append_text("*")

     

       102
       102
       +
                   case "del" | "s":

     

       103
       103
       +
                       self.append_text("~~")

     

       104
       104
       +
                   case "p":

     

       105
       105
       +
                       self.append_text("\n\n")

     

       106
       106
       +
                   case "h1" | "h2" | "h3" | "h4" | "h5" | "h6":

     

       107
       107
       +
                       self.append_text("\n")

     

       108
       108
       +
                   case _:

     

       109
       109
       +
                       # self.builder.extend(f"</{tag}>".encode("utf-8"))

     

       110
       110
       +
                       pass

     

       111
       111
       +
       

     

       112
       112
       +
           @override

     

       113
       113
       +
           def handle_data(self, data: str) -> None:

     

       114
       114
       +
               if self.invisible:

     

       115
       115
       +
                   return

     

       116
       116
       +
       

     

       117
       117
       +
               if self._tag_stack.get('a'):

     

       118
       118
       +
                   label, _attr = self._tag_stack.pop("a")

     

       119
       119
       +
                   self._tag_stack["a"] = (label + data, _attr)

     

       120
       120
       +
                   return

     

       121
       121
       +
       

     

       122
       122
       +
           def get_result(self) -> list[Token]:

     

       123
       123
       +
               if not self.tokens:

     

       124
       124
       +
                   return []

     

       125
       125
       +
       

     

       126
       126
       +
               combined: list[Token] = []

     

       127
       127
       +
               buffer: list[str] = []

     

       128
       128
       +
       

     

       129
       129
       +
               def flush_buffer():

     

       130
       130
       +
                   if buffer:

     

       131
       131
       +
                       merged = "".join(buffer)

     

       132
       132
       +
                       combined.append(TextToken(text=merged))

     

       133
       133
       +
                       buffer.clear()

     

       134
       134
       +
       

     

       135
       135
       +
               for token in self.tokens:

     

       136
       136
       +
                   if isinstance(token, TextToken):

     

       137
       137
       +
                       buffer.append(token.text)

     

       138
       138
       +
                   else:

     

       139
       139
       +
                       flush_buffer()

     

       140
       140
       +
                       combined.append(token)

     

       141
       141
       +
       

     

       142
       142
       +
               flush_buffer()

     

       143
       143
       +
       

     

       144
       144
       +
               if combined and isinstance(combined[-1], TextToken):

     

       145
       145
       +
                   if combined[-1].text.endswith("\n\n"):

     

       146
       146
       +
                       combined[-1] = TextToken(text=combined[-1].text[:-2])

     

       147
       147
       +
       

     

       148
       148
       +
                   if combined[-1].text.endswith("\n"):

     

       149
       149
       +
                       combined[-1] = TextToken(text=combined[-1].text[:-1])

     

       150
       150
       +
               return combined

+126

util/markdown.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       

     

       3
       3
       +
       from cross.tokens import LinkToken, MentionToken, TagToken, TextToken, Token

     

       4
       4
       +
       from util.html import HTMLToTokensParser

     

       5
       5
       +
       from util.splitter import canonical_label

     

       6
       6
       +
       

     

       7
       7
       +
       URL = re.compile(r"(?:(?:[A-Za-z][A-Za-z0-9+.-]*://)|mailto:)[^\s]+", re.IGNORECASE)

     

       8
       8
       +
       MD_INLINE_LINK = re.compile(

     

       9
       9
       +
           r"\[([^\]]+)\]\(\s*((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s\)]+)\s*\)",

     

       10
       10
       +
           re.IGNORECASE,

     

       11
       11
       +
       )

     

       12
       12
       +
       MD_AUTOLINK = re.compile(

     

       13
       13
       +
           r"<((?:(?:[A-Za-z][A-Za-z0-9+.\-]*://)|mailto:)[^\s>]+)>", re.IGNORECASE

     

       14
       14
       +
       )

     

       15
       15
       +
       HASHTAG = re.compile(r"(?<!\w)\#([\w]+)")

     

       16
       16
       +
       FEDIVERSE_HANDLE = re.compile(r"(?<![\w@])@([\w\.-]+)(?:@([\w\.-]+\.[\w\.-]+))?")

     

       17
       17
       +
       

     

       18
       18
       +
       REGEXES = [URL, MD_INLINE_LINK, MD_AUTOLINK, HASHTAG, FEDIVERSE_HANDLE]

     

       19
       19
       +
       

     

       20
       20
       +
       

     

       21
       21
       +
       # TODO autolinks are broken by the html parser

     

       22
       22
       +
       class MarkdownParser:

     

       23
       23
       +
           def parse(

     

       24
       24
       +
               self, text: str, tags: list[str], handles: list[tuple[str, str]]

     

       25
       25
       +
           ) -> list[Token]:

     

       26
       26
       +
               if not text:

     

       27
       27
       +
                   return []

     

       28
       28
       +
       

     

       29
       29
       +
               tokenizer = HTMLToTokensParser()

     

       30
       30
       +
               tokenizer.feed(text)

     

       31
       31
       +
               html_tokens = tokenizer.get_result()

     

       32
       32
       +
       

     

       33
       33
       +
               tokens: list[Token] = []

     

       34
       34
       +
       

     

       35
       35
       +
               for tk in html_tokens:

     

       36
       36
       +
                   if isinstance(tk, TextToken):

     

       37
       37
       +
                       tokens.extend(self.__tokenize_md(tk.text, tags, handles))

     

       38
       38
       +
                   elif isinstance(tk, LinkToken):

     

       39
       39
       +
                       if not tk.label or canonical_label(tk.label, tk.href):

     

       40
       40
       +
                           tokens.append(tk)

     

       41
       41
       +
                           continue

     

       42
       42
       +
       

     

       43
       43
       +
                       tokens.extend(

     

       44
       44
       +
                           self.__tokenize_md(f"[{tk.label}]({tk.href})", tags, handles)

     

       45
       45
       +
                       )

     

       46
       46
       +
                   else:

     

       47
       47
       +
                       tokens.append(tk)

     

       48
       48
       +
       

     

       49
       49
       +
               return tokens

     

       50
       50
       +
       

     

       51
       51
       +
           def __tokenize_md(

     

       52
       52
       +
               self, text: str, tags: list[str], handles: list[tuple[str, str]]

     

       53
       53
       +
           ) -> list[Token]:

     

       54
       54
       +
               index: int = 0

     

       55
       55
       +
               total: int = len(text)

     

       56
       56
       +
               buffer: list[str] = []

     

       57
       57
       +
       

     

       58
       58
       +
               tokens: list[Token] = []

     

       59
       59
       +
       

     

       60
       60
       +
               def flush():

     

       61
       61
       +
                   nonlocal buffer

     

       62
       62
       +
                   if buffer:

     

       63
       63
       +
                       tokens.append(TextToken(text="".join(buffer)))

     

       64
       64
       +
                       buffer = []

     

       65
       65
       +
       

     

       66
       66
       +
               while index < total:

     

       67
       67
       +
                   if text[index] == "[":

     

       68
       68
       +
                       md_inline = MD_INLINE_LINK.match(text, index)

     

       69
       69
       +
                       if md_inline:

     

       70
       70
       +
                           flush()

     

       71
       71
       +
                           label = md_inline.group(1)

     

       72
       72
       +
                           href = md_inline.group(2)

     

       73
       73
       +
                           tokens.append(LinkToken(href=href, label=label))

     

       74
       74
       +
                           index = md_inline.end()

     

       75
       75
       +
                           continue

     

       76
       76
       +
       

     

       77
       77
       +
                   if text[index] == "<":

     

       78
       78
       +
                       md_auto = MD_AUTOLINK.match(text, index)

     

       79
       79
       +
                       if md_auto:

     

       80
       80
       +
                           flush()

     

       81
       81
       +
                           href = md_auto.group(1)

     

       82
       82
       +
                           tokens.append(LinkToken(href=href, label=None))

     

       83
       83
       +
                           index = md_auto.end()

     

       84
       84
       +
                           continue

     

       85
       85
       +
       

     

       86
       86
       +
                   if text[index] == "#":

     

       87
       87
       +
                       tag = HASHTAG.match(text, index)

     

       88
       88
       +
                       if tag:

     

       89
       89
       +
                           tag_text = tag.group(1)

     

       90
       90
       +
                           if tag_text.lower() in tags:

     

       91
       91
       +
                               flush()

     

       92
       92
       +
                               tokens.append(TagToken(tag=tag_text))

     

       93
       93
       +
                               index = tag.end()

     

       94
       94
       +
                               continue

     

       95
       95
       +
       

     

       96
       96
       +
                   if text[index] == "@":

     

       97
       97
       +
                       handle = FEDIVERSE_HANDLE.match(text, index)

     

       98
       98
       +
                       if handle:

     

       99
       99
       +
                           handle_text = handle.group(0)

     

       100
       100
       +
                           stripped_handle = handle_text.strip()

     

       101
       101
       +
       

     

       102
       102
       +
                           match = next(

     

       103
       103
       +
                               (pair for pair in handles if stripped_handle in pair), None

     

       104
       104
       +
                           )

     

       105
       105
       +
       

     

       106
       106
       +
                           if match:

     

       107
       107
       +
                               flush()

     

       108
       108
       +
                               tokens.append(

     

       109
       109
       +
                                   MentionToken(username=match[1], uri=None)

     

       110
       110
       +
                               )  # TODO: misskey doesn’t provide a uri

     

       111
       111
       +
                               index = handle.end()

     

       112
       112
       +
                               continue

     

       113
       113
       +
       

     

       114
       114
       +
                   url = URL.match(text, index)

     

       115
       115
       +
                   if url:

     

       116
       116
       +
                       flush()

     

       117
       117
       +
                       href = url.group(0)

     

       118
       118
       +
                       tokens.append(LinkToken(href=href, label=None))

     

       119
       119
       +
                       index = url.end()

     

       120
       120
       +
                       continue

     

       121
       121
       +
       

     

       122
       122
       +
                   buffer.append(text[index])

     

       123
       123
       +
                   index += 1

     

       124
       124
       +
       

     

       125
       125
       +
               flush()

     

       126
       126
       +
               return tokens

+120

util/splitter.py

···

       1
       1
       +
       import re

     

       2
       2
       +
       from dataclasses import replace

     

       3
       3
       +
       

     

       4
       4
       +
       import grapheme

     

       5
       5
       +
       

     

       6
       6
       +
       from cross.tokens import LinkToken, TagToken, TextToken, Token

     

       7
       7
       +
       

     

       8
       8
       +
       

     

       9
       9
       +
       def canonical_label(label: str | None, href: str):

     

       10
       10
       +
           if not label or label == href:

     

       11
       11
       +
               return True

     

       12
       12
       +
       

     

       13
       13
       +
           split = href.split("://", 1)

     

       14
       14
       +
           if len(split) > 1:

     

       15
       15
       +
               if split[1] == label:

     

       16
       16
       +
                   return True

     

       17
       17
       +
       

     

       18
       18
       +
           return False

     

       19
       19
       +
       

     

       20
       20
       +
       

     

       21
       21
       +
       ALTERNATE = re.compile(r"\S+|\s+")

     

       22
       22
       +
       

     

       23
       23
       +
       

     

       24
       24
       +
       def split_tokens(

     

       25
       25
       +
           tokens: list[Token],

     

       26
       26
       +
           max_chars: int,

     

       27
       27
       +
           max_link_len: int = 35,

     

       28
       28
       +
       ) -> list[list[Token]]:

     

       29
       29
       +
           def new_block() -> None:

     

       30
       30
       +
               nonlocal blocks, block, length

     

       31
       31
       +
               if block:

     

       32
       32
       +
                   blocks.append(block)

     

       33
       33
       +
               block, length = [], 0

     

       34
       34
       +
       

     

       35
       35
       +
           def append_text(text: str) -> None:

     

       36
       36
       +
               nonlocal block

     

       37
       37
       +
               if block and isinstance(block[-1], TextToken):

     

       38
       38
       +
                   block[-1] = replace(block[-1], text=block[-1].text + text)

     

       39
       39
       +
               else:

     

       40
       40
       +
                   block.append(TextToken(text=text))

     

       41
       41
       +
       

     

       42
       42
       +
           blocks: list[list[Token]] = []

     

       43
       43
       +
           block: list[Token] = []

     

       44
       44
       +
           length: int = 0

     

       45
       45
       +
       

     

       46
       46
       +
           for tk in tokens:

     

       47
       47
       +
               if isinstance(tk, TagToken):

     

       48
       48
       +
                   tag_len = 1 + grapheme.length(tk.tag)

     

       49
       49
       +
                   if length + tag_len > max_chars:

     

       50
       50
       +
                       new_block()

     

       51
       51
       +
                   block.append(tk)

     

       52
       52
       +
                   length += tag_len

     

       53
       53
       +
                   continue

     

       54
       54
       +
               if isinstance(tk, LinkToken):

     

       55
       55
       +
                   label_text = tk.label or ""

     

       56
       56
       +
                   link_len = grapheme.length(label_text)

     

       57
       57
       +
       

     

       58
       58
       +
                   if canonical_label(tk.label, tk.href):

     

       59
       59
       +
                       link_len = min(link_len, max_link_len)

     

       60
       60
       +
       

     

       61
       61
       +
                   if length + link_len <= max_chars:

     

       62
       62
       +
                       block.append(tk)

     

       63
       63
       +
                       length += link_len

     

       64
       64
       +
                       continue

     

       65
       65
       +
       

     

       66
       66
       +
                   if length:

     

       67
       67
       +
                       new_block()

     

       68
       68
       +
       

     

       69
       69
       +
                   remaining = label_text

     

       70
       70
       +
                   while remaining:

     

       71
       71
       +
                       room = (

     

       72
       72
       +
                           max_chars

     

       73
       73
       +
                           - length

     

       74
       74
       +
                           - (0 if grapheme.length(remaining) <= max_chars else 1)

     

       75
       75
       +
                       )

     

       76
       76
       +
                       chunk = grapheme.slice(remaining, 0, room)

     

       77
       77
       +
                       if grapheme.length(remaining) > room:

     

       78
       78
       +
                           chunk += "-"

     

       79
       79
       +
       

     

       80
       80
       +
                       block.append(replace(tk, label=chunk))

     

       81
       81
       +
                       length += grapheme.length(chunk)

     

       82
       82
       +
       

     

       83
       83
       +
                       remaining = grapheme.slice(remaining, room, grapheme.length(remaining))

     

       84
       84
       +
                       if remaining:

     

       85
       85
       +
                           new_block()

     

       86
       86
       +
                   continue

     

       87
       87
       +
               if isinstance(tk, TextToken):

     

       88
       88
       +
                   for seg in ALTERNATE.findall(tk.text):

     

       89
       89
       +
                       seg_len = grapheme.length(seg)

     

       90
       90
       +
       

     

       91
       91
       +
                       if length + seg_len <= max_chars - (0 if seg.isspace() else 1):

     

       92
       92
       +
                           append_text(seg)

     

       93
       93
       +
                           length += seg_len

     

       94
       94
       +
                           continue

     

       95
       95
       +
       

     

       96
       96
       +
                       if length:

     

       97
       97
       +
                           new_block()

     

       98
       98
       +
       

     

       99
       99
       +
                       if not seg.isspace():

     

       100
       100
       +
                           while grapheme.length(seg) > max_chars - 1:

     

       101
       101
       +
                               chunk = grapheme.slice(seg, 0, max_chars - 1) + "-"

     

       102
       102
       +
                               append_text(chunk)

     

       103
       103
       +
                               new_block()

     

       104
       104
       +
                               seg = grapheme.slice(seg, max_chars - 1, grapheme.length(seg))

     

       105
       105
       +
                       else:

     

       106
       106
       +
                           while grapheme.length(seg) > max_chars:

     

       107
       107
       +
                               chunk = grapheme.slice(seg, 0, max_chars)

     

       108
       108
       +
                               append_text(chunk)

     

       109
       109
       +
                               new_block()

     

       110
       110
       +
                               seg = grapheme.slice(seg, max_chars, grapheme.length(seg))

     

       111
       111
       +
       

     

       112
       112
       +
                       if seg:

     

       113
       113
       +
                           append_text(seg)

     

       114
       114
       +
                           length = grapheme.length(seg)

     

       115
       115
       +
                   continue

     

       116
       116
       +
               block.append(tk)

     

       117
       117
       +
           if block:

     

       118
       118
       +
               blocks.append(block)

     

       119
       119
       +
       

     

       120
       120
       +
           return blocks

+30 -1

util/util.py

···

       1
       1
        
       import logging

     

       2
       2
        
       import sys

     

       3
       3
       +
       import os

     

       4
       4
       +
       from typing import Any, Callable

     

       3
       5
        
       

     

       4
       4
       -
       logging.basicConfig(stream=sys.stderr, level=logging.INFO)

     

       6
       6
       +
       import env

     

       7
       7
       +
       

     

       8
       8
       +
       shutdown_hook: list[Callable[[], None]] = []

     

       9
       9
       +
       

     

       10
       10
       +
       logging.basicConfig(stream=sys.stderr, level=logging.DEBUG if env.DEV else logging.INFO)

     

       5
       11
        
       LOGGER = logging.getLogger("XPost")

     

       12
       12
       +
       

     

       13
       13
       +
       def normalize_service_url(url: str) -> str:

     

       14
       14
       +
           if not url.startswith("https://") and not url.startswith("http://"):

     

       15
       15
       +
               raise ValueError(f"Invalid service url {url}! Only http/https are supported.")

     

       16
       16
       +
       

     

       17
       17
       +
           return url[:-1] if url.endswith('/') else url

     

       18
       18
       +
       

     

       19
       19
       +
       def read_env(data: dict[str, Any]) -> None:

     

       20
       20
       +
           keys = list(data.keys())

     

       21
       21
       +
           for key in keys:

     

       22
       22
       +
               val = data[key]

     

       23
       23
       +
               match val:

     

       24
       24
       +
                   case str():

     

       25
       25
       +
                       if val.startswith('env:'):

     

       26
       26
       +
                           envval = os.environ.get(val[4:])

     

       27
       27
       +
                           if envval is None:

     

       28
       28
       +
                               del data[key]

     

       29
       29
       +
                           else:

     

       30
       30
       +
                               data[key] = envval

     

       31
       31
       +
                   case dict():

     

       32
       32
       +
                       read_env(val)

     

       33
       33
       +
                   case _:

     

       34
       34
       +
                       pass

+88

uv.lock

···

       69
       69
        
       ]

     

       70
       70
        
       

     

       71
       71
        
       [[package]]

     

       72
       72
       +
       name = "colorama"

     

       73
       73
       +
       version = "0.4.6"

     

       74
       74
       +
       source = { registry = "https://pypi.org/simple" }

     

       75
       75
       +
       sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }

     

       76
       76
       +
       wheels = [

     

       77
       77
       +
           { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },

     

       78
       78
       +
       ]

     

       79
       79
       +
       

     

       80
       80
       +
       [[package]]

     

       81
       81
       +
       name = "dnspython"

     

       82
       82
       +
       version = "2.8.0"

     

       83
       83
       +
       source = { registry = "https://pypi.org/simple" }

     

       84
       84
       +
       sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }

     

       85
       85
       +
       wheels = [

     

       86
       86
       +
           { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },

     

       87
       87
       +
       ]

     

       88
       88
       +
       

     

       89
       89
       +
       [[package]]

     

       90
       90
       +
       name = "grapheme"

     

       91
       91
       +
       version = "0.6.0"

     

       92
       92
       +
       source = { registry = "https://pypi.org/simple" }

     

       93
       93
       +
       sdist = { url = "https://files.pythonhosted.org/packages/ce/e7/bbaab0d2a33e07c8278910c1d0d8d4f3781293dfbc70b5c38197159046bf/grapheme-0.6.0.tar.gz", hash = "sha256:44c2b9f21bbe77cfb05835fec230bd435954275267fea1858013b102f8603cca", size = 207306, upload-time = "2020-03-07T17:13:55.492Z" }

     

       94
       94
       +
       

     

       95
       95
       +
       [[package]]

     

       72
       96
        
       name = "idna"

     

       73
       97
        
       version = "3.11"

     

       74
       98
        
       source = { registry = "https://pypi.org/simple" }

     
···

       78
       102
        
       ]

     

       79
       103
        
       

     

       80
       104
        
       [[package]]

     

       105
       105
       +
       name = "iniconfig"

     

       106
       106
       +
       version = "2.3.0"

     

       107
       107
       +
       source = { registry = "https://pypi.org/simple" }

     

       108
       108
       +
       sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }

     

       109
       109
       +
       wheels = [

     

       110
       110
       +
           { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },

     

       111
       111
       +
       ]

     

       112
       112
       +
       

     

       113
       113
       +
       [[package]]

     

       114
       114
       +
       name = "packaging"

     

       115
       115
       +
       version = "25.0"

     

       116
       116
       +
       source = { registry = "https://pypi.org/simple" }

     

       117
       117
       +
       sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }

     

       118
       118
       +
       wheels = [

     

       119
       119
       +
           { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },

     

       120
       120
       +
       ]

     

       121
       121
       +
       

     

       122
       122
       +
       [[package]]

     

       123
       123
       +
       name = "pluggy"

     

       124
       124
       +
       version = "1.6.0"

     

       125
       125
       +
       source = { registry = "https://pypi.org/simple" }

     

       126
       126
       +
       sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }

     

       127
       127
       +
       wheels = [

     

       128
       128
       +
           { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },

     

       129
       129
       +
       ]

     

       130
       130
       +
       

     

       131
       131
       +
       [[package]]

     

       132
       132
       +
       name = "pygments"

     

       133
       133
       +
       version = "2.19.2"

     

       134
       134
       +
       source = { registry = "https://pypi.org/simple" }

     

       135
       135
       +
       sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }

     

       136
       136
       +
       wheels = [

     

       137
       137
       +
           { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },

     

       138
       138
       +
       ]

     

       139
       139
       +
       

     

       140
       140
       +
       [[package]]

     

       141
       141
       +
       name = "pytest"

     

       142
       142
       +
       version = "8.4.2"

     

       143
       143
       +
       source = { registry = "https://pypi.org/simple" }

     

       144
       144
       +
       dependencies = [

     

       145
       145
       +
           { name = "colorama", marker = "sys_platform == 'win32'" },

     

       146
       146
       +
           { name = "iniconfig" },

     

       147
       147
       +
           { name = "packaging" },

     

       148
       148
       +
           { name = "pluggy" },

     

       149
       149
       +
           { name = "pygments" },

     

       150
       150
       +
       ]

     

       151
       151
       +
       sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }

     

       152
       152
       +
       wheels = [

     

       153
       153
       +
           { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },

     

       154
       154
       +
       ]

     

       155
       155
       +
       

     

       156
       156
       +
       [[package]]

     

       81
       157
        
       name = "python-magic"

     

       82
       158
        
       version = "0.4.27"

     

       83
       159
        
       source = { registry = "https://pypi.org/simple" }

     
···

       146
       222
        
       version = "0.1.0"

     

       147
       223
        
       source = { virtual = "." }

     

       148
       224
        
       dependencies = [

     

       225
       225
       +
           { name = "dnspython" },

     

       226
       226
       +
           { name = "grapheme" },

     

       149
       227
        
           { name = "python-magic" },

     

       150
       228
        
           { name = "requests" },

     

       151
       229
        
           { name = "websockets" },

     

       152
       230
        
       ]

     

       153
       231
        
       

     

       232
       232
       +
       [package.dev-dependencies]

     

       233
       233
       +
       dev = [

     

       234
       234
       +
           { name = "pytest" },

     

       235
       235
       +
       ]

     

       236
       236
       +
       

     

       154
       237
        
       [package.metadata]

     

       155
       238
        
       requires-dist = [

     

       239
       239
       +
           { name = "dnspython", specifier = ">=2.8.0" },

     

       240
       240
       +
           { name = "grapheme", specifier = ">=0.6.0" },

     

       156
       241
        
           { name = "python-magic", specifier = ">=0.4.27" },

     

       157
       242
        
           { name = "requests", specifier = ">=2.32.5" },

     

       158
       243
        
           { name = "websockets", specifier = ">=15.0.1" },

     

       159
       244
        
       ]

     

       245
       245
       +
       

     

       246
       246
       +
       [package.metadata.requires-dev]

     

       247
       247
       +
       dev = [{ name = "pytest", specifier = ">=8.4.2" }]

Compare changes