social media crossposting tool. 3rd time's the charm
mastodon misskey crossposting bluesky

get post of the bluesky input service in place. fragments and attachments left

zenfyr.dev bde55533 6f8993c0

verified
+14
atproto/util.py
···
+
from multiprocessing import Value
+
+
+
URI = "at://"
+
URI_LEN = len(URI)
+
+
+
class AtUri:
+
@classmethod
+
def record_uri(cls, uri: str) -> tuple[str, str, str]:
+
did, collection, rid = uri[URI_LEN:].split("/")
+
if not (did and collection and rid):
+
raise ValueError(f"Ivalid record uri {uri}!")
+
return did, collection, rid
+4 -4
bluesky/info.py
···
from abc import ABC, abstractmethod
-
from typing import Any
+
from typing import Any, override
from atproto.identity import did_resolver, handle_resolver
from cross.service import Service
-
from util.util import LOGGER, normalize_service_url
+
from util.util import normalize_service_url
SERVICE = "https://bsky.app"
···
if not did:
if not handle:
raise KeyError("No did: or atproto handle provided!")
-
LOGGER.info("Resolving ATP identity for %s...", handle)
+
self.log.info("Resolving ATP identity for %s...", handle)
self.did = handle_resolver.resolve_handle(handle)
if not pds:
-
LOGGER.info("Resolving PDS from %s DID document...", did)
+
self.log.info("Resolving PDS from %s DID document...", did)
atp_pds = did_resolver.resolve_did(self.did).get_atproto_pds()
if not atp_pds:
raise Exception("Failed to resolve atproto pds for %s")
+102 -7
bluesky/input.py
···
import websockets
+
from atproto.util import AtUri
from bluesky.info import SERVICE, BlueskyService, validate_and_transform
+
from cross.attachments import LabelsAttachment, LanguagesAttachment, RemoteUrlAttachment
+
from cross.post import Post
from cross.service import InputService
from database.connection import DatabasePool
-
from util.util import LOGGER, normalize_service_url
+
from util.util import normalize_service_url
@dataclass(kw_only=True)
···
super().__init__(SERVICE, db)
def _on_post(self, record: dict[str, Any]):
-
LOGGER.info(record) # TODO
+
post_uri = cast(str, record["$xpost.strongRef"]["uri"])
+
post_cid = cast(str, record["$xpost.strongRef"]["cid"])
+
+
parent_uri = cast(
+
str,
+
None if not record.get("reply") else record["reply"]["parent"]["uri"]
+
)
+
parent = None
+
if parent_uri:
+
parent = self._get_post(self.url, self.did, parent_uri)
+
if not parent:
+
self.log.info("Skipping %s, parent %s not found in db", post_uri, parent_uri)
+
return
+
+
post = Post(id=post_uri, parent_id=parent_uri, text=record["text"])
+
did, _, rid = AtUri.record_uri(post_uri)
+
post.attachments.put(RemoteUrlAttachment(url=f"https://bsky.app/profile/{did}/post/{rid}"))
+
+
# TODO Media Attachments
+
embed = record.get("embed", {})
+
if embed:
+
match cast(str, embed["$type"]):
+
case "app.bsky.embed.record" | "app.bsky.embed.recordWithMedia":
+
_, collection, _ = AtUri.record_uri(
+
cast(str, embed["record"]["uri"])
+
)
+
if collection == "app.bsky.feed.post":
+
self.log.info("Skipping '%s'! Quote..", post_uri)
+
return
+
case _:
+
self.log.warning(f"Unhandled embedd type {embed['$type']}")
+
pass
+
+
if "langs" in record:
+
post.attachments.put(
+
LanguagesAttachment(langs=record["langs"])
+
)
+
if "labels" in record:
+
post.attachments.put(
+
LabelsAttachment(
+
labels=[
+
label["val"].replace("-", " ") for label in record["values"]
+
]
+
),
+
)
+
+
if parent:
+
self._insert_post({
+
"user": self.did,
+
"service": self.url,
+
"identifier": post_uri,
+
"parent": parent['id'],
+
"root": parent['id'] if not parent['root'] else parent['root'],
+
"extra_data": json.dumps({'cid': post_cid})
+
})
+
else:
+
self._insert_post({
+
"user": self.did,
+
"service": self.url,
+
"identifier": post_uri,
+
"extra_data": json.dumps({'cid': post_cid})
+
})
+
+
for out in self.outputs:
+
self.submitter(lambda: out.accept_post(post))
def _on_repost(self, record: dict[str, Any]):
-
LOGGER.info(record) # TODO
+
post_uri = cast(str, record["$xpost.strongRef"]["uri"])
+
post_cid = cast(str, record["$xpost.strongRef"]["cid"])
+
+
reposted_uri = cast(str, record["subject"]["uri"])
+
reposted = self._get_post(self.url, self.did, reposted_uri)
+
if not reposted:
+
self.log.info("Skipping repost '%s' as reposted post '%s' was not found in the db.")
+
return
+
+
self._insert_post({
+
"user": self.did,
+
"service": self.url,
+
"identifier": post_uri,
+
"reposted": reposted['id'],
+
"extra_data": json.dumps({'cid': post_cid})
+
})
+
+
for out in self.outputs:
+
self.submitter(lambda: out.accept_repost(post_uri, reposted_uri))
def _on_delete_post(self, post_id: str, repost: bool):
-
LOGGER.info("%s | %s", post_id, repost) # TODO
+
post = self._get_post(self.url, self.did, post_id)
+
if not post:
+
return
+
+
if repost:
+
for output in self.outputs:
+
self.submitter(lambda: output.delete_repost(post_id))
+
else:
+
for output in self.outputs:
+
self.submitter(lambda: output.delete_post(post_id))
+
self._delete_post_by_id(post['id'])
class BlueskyJetstreamInputService(BlueskyBaseInputService):
···
async for ws in websockets.connect(url):
try:
-
LOGGER.info("Listening to %s...", self.options.jetstream)
+
self.log.info("Listening to %s...", self.options.jetstream)
async def listen_for_messages():
async for msg in ws:
···
_ = await asyncio.gather(listen)
except websockets.ConnectionClosedError as e:
-
LOGGER.error(e, stack_info=True, exc_info=True)
-
LOGGER.info("Reconnecting to %s...", self.options.jetstream)
+
self.log.error(e, stack_info=True, exc_info=True)
+
self.log.info("Reconnecting to %s...", self.options.jetstream)
continue
+8 -8
cross/attachments.py
···
from dataclasses import dataclass
-
@dataclass
+
@dataclass(kw_only=True)
class Attachment:
pass
-
@dataclass
-
class SpoilerAttachment(Attachment):
-
spoiler: str
+
@dataclass(kw_only=True)
+
class LabelsAttachment(Attachment):
+
labels: list[str]
-
@dataclass
+
@dataclass(kw_only=True)
class LanguagesAttachment(Attachment):
langs: list[str]
-
@dataclass
+
@dataclass(kw_only=True)
class SensitiveAttachment(Attachment):
sensitive: bool
-
@dataclass
+
@dataclass(kw_only=True)
class RemoteUrlAttachment(Attachment):
url: str
-
@dataclass
+
@dataclass(kw_only=True)
class QuoteAttachment(Attachment):
quoted_id: str
+4 -4
cross/fragments.py
···
from dataclasses import dataclass
-
@dataclass
+
@dataclass(kw_only=True)
class Fragment:
start: int
end: int
-
@dataclass
+
@dataclass(kw_only=True)
class LinkFragment(Fragment):
url: str
-
@dataclass
+
@dataclass(kw_only=True)
class TagFragment(Fragment):
tag: str
-
@dataclass
+
@dataclass(kw_only=True)
class MentionFragment(Fragment):
uri: str
+6 -3
cross/post.py
···
def __init__(self) -> None:
self._map: dict[type, Attachment] = {}
-
def put(self, cls: type[T], attachment: T) -> None:
-
self._map[cls] = attachment
+
def put(self, attachment: Attachment) -> None:
+
self._map[attachment.__class__] = attachment
def get(self, cls: type[T]) -> T | None:
instance = self._map.get(cls)
···
raise TypeError(f"Expected {cls.__name__}, got {type(instance).__name__}")
return instance
+
def __repr__(self) -> str:
+
return f"AttachmentKeeper(_map={self._map.values()})"
+
@dataclass
class Post:
id: str
parent_id: str | None
text: str # utf-8 text
-
attachments: AttachmentKeeper
+
attachments: AttachmentKeeper = field(default_factory=AttachmentKeeper)
fragments: list[Fragment] = field(default_factory=list)
+56 -9
cross/service.py
···
import sqlite3
from abc import ABC, abstractmethod
-
from typing import Callable, cast
+
from typing import Any, Callable, cast
+
import logging
from cross.post import Post
from database.connection import DatabasePool
-
from util.util import LOGGER
+
+
columns: list[str] = [
+
"user",
+
"service",
+
"identifier",
+
"parent",
+
"root",
+
"reposted",
+
"extra_data",
+
]
+
placeholders: str = ", ".join(["?" for _ in columns])
+
column_names: str = ", ".join(columns)
class Service:
def __init__(self, url: str, db: DatabasePool) -> None:
self.url: str = url
self.db: DatabasePool = db
+
self.log: logging.Logger = logging.getLogger(self.__class__.__name__)
# self._lock: threading.Lock = threading.Lock()
-
def get_post(self, url: str, user: str, identifier: str) -> sqlite3.Row | None:
+
def _get_post(self, url: str, user: str, identifier: str) -> sqlite3.Row | None:
cursor = self.db.get_conn().cursor()
_ = cursor.execute(
"""
SELECT * FROM posts
WHERE service = ?
-
AND user_id = ?
+
AND user = ?
AND identifier = ?
""",
(url, user, identifier),
)
return cast(sqlite3.Row, cursor.fetchone())
-
def get_post_by_id(self, id: int) -> sqlite3.Row | None:
+
def _get_post_by_id(self, id: int) -> sqlite3.Row | None:
cursor = self.db.get_conn().cursor()
_ = cursor.execute("SELECT * FROM posts WHERE id = ?", (id,))
return cast(sqlite3.Row, cursor.fetchone())
+
def _insert_post(self, post_data: dict[str, Any]):
+
values = [post_data.get(col) for col in columns]
+
cursor = self.db.get_conn().cursor()
+
_ = cursor.execute(
+
f"INSERT INTO posts ({column_names}) VALUES ({placeholders})", values
+
)
+
+
def _insert_post_mapping(self, original: int, mapped: int):
+
cursor = self.db.get_conn().cursor()
+
_ = cursor.execute(
+
"INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?);",
+
(original, mapped),
+
)
+
_ = cursor.execute(
+
"INSERT OR IGNORE INTO mappings (original, mapped) VALUES (?, ?);",
+
(mapped, original),
+
)
+
+
def _delete_post(self, url: str, user: str, identifier: str):
+
cursor = self.db.get_conn().cursor()
+
_ = cursor.execute(
+
"""
+
DELETE FROM posts
+
WHERE identifier = ?
+
AND service = ?
+
AND user = ?
+
""",
+
(identifier, url, user),
+
)
+
+
def _delete_post_by_id(self, id: int):
+
cursor = self.db.get_conn().cursor()
+
_ = cursor.execute("DELETE FROM posts WHERE id = ?", (id,))
+
class OutputService(Service):
def accept_post(self, post: Post):
-
LOGGER.warning("NOT IMPLEMENTED (%s), accept_post %s", self.url, post.id)
+
self.log.warning("NOT IMPLEMENTED (%s), accept_post %s", self.url, post.id)
def delete_post(self, post_id: str):
-
LOGGER.warning("NOT IMPLEMENTED (%s), delete_post %s", self.url, post_id)
+
self.log.warning("NOT IMPLEMENTED (%s), delete_post %s", self.url, post_id)
def accept_repost(self, repost_id: str, reposted_id: str):
-
LOGGER.warning(
+
self.log.warning(
"NOT IMPLEMENTED (%s), accept_repost %s of %s",
self.url,
repost_id,
···
)
def delete_repost(self, repost_id: str):
-
LOGGER.warning("NOT IMPLEMENTED (%s), delete_repost %s", self.url, repost_id)
+
self.log.warning("NOT IMPLEMENTED (%s), delete_repost %s", self.url, repost_id)
class InputService(ABC, Service):
+3 -3
mastodon/info.py
···
import requests
from cross.service import Service
-
from util.util import LOGGER, normalize_service_url
+
from util.util import normalize_service_url
def validate_and_transform(data: dict[str, Any]):
if 'token' not in data or 'instance' not in data:
···
headers={"Authorization": f"Bearer {token}"},
)
if responce.status_code != 200:
-
LOGGER.error("Failed to validate user credentials!")
+
self.log.error("Failed to validate user credentials!")
responce.raise_for_status()
return dict(responce.json())
···
headers={"Authorization": f"Bearer {token}"},
)
if responce.status_code != 200:
-
LOGGER.error("Failed to get instance info!")
+
self.log.error("Failed to get instance info!")
responce.raise_for_status()
return dict(responce.json())
+7 -8
mastodon/input.py
···
from cross.service import InputService
from database.connection import DatabasePool
from mastodon.info import MastodonService, validate_and_transform
-
from util.util import LOGGER
ALLOWED_VISIBILITY: list[str] = ["public", "unlisted"]
···
super().__init__(options.instance, db)
self.options: MastodonInputOptions = options
-
LOGGER.info("Verifying %s credentails...", self.url)
+
self.log.info("Verifying %s credentails...", self.url)
responce = self.verify_credentials()
self.user_id: str = responce["id"]
-
LOGGER.info("Getting %s configuration...", self.url)
+
self.log.info("Getting %s configuration...", self.url)
responce = self.fetch_instance_info()
self.streaming_url: str = responce["urls"]["streaming_api"]
···
return self.options.token
def _on_create_post(self, status: dict[str, Any]):
-
LOGGER.info(status) # TODO
+
self.log.info(status) # TODO
def _on_delete_post(self, status_id: str):
-
LOGGER.info(status_id) # TODO
+
self.log.info(status_id) # TODO
def _accept_msg(self, msg: websockets.Data) -> None:
data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))
···
url, additional_headers={"Authorization": f"Bearer {self.options.token}"}
):
try:
-
LOGGER.info("Listening to %s...", self.streaming_url)
+
self.log.info("Listening to %s...", self.streaming_url)
async def listen_for_messages():
async for msg in ws:
···
_ = await asyncio.gather(listen)
except websockets.ConnectionClosedError as e:
-
LOGGER.error(e, stack_info=True, exc_info=True)
-
LOGGER.info("Reconnecting to %s...", self.streaming_url)
+
self.log.error(e, stack_info=True, exc_info=True)
+
self.log.info("Reconnecting to %s...", self.streaming_url)
continue
+2 -3
mastodon/output.py
···
from cross.service import OutputService
from database.connection import DatabasePool
from mastodon.info import InstanceInfo, MastodonService, validate_and_transform
-
from util.util import LOGGER
ALLOWED_POSTING_VISIBILITY: list[str] = ["public", "unlisted", "private"]
···
super().__init__(options.instance, db)
self.options: MastodonOutputOptions = options
-
LOGGER.info("Verifying %s credentails...", self.url)
+
self.log.info("Verifying %s credentails...", self.url)
responce = self.verify_credentials()
self.user_id: str = responce["id"]
-
LOGGER.info("Getting %s configuration...", self.url)
+
self.log.info("Getting %s configuration...", self.url)
responce = self.fetch_instance_info()
self.instance_info: InstanceInfo = InstanceInfo.from_api(responce)
+3 -2
migrations/001_initdb.sql
···
CREATE TABLE IF NOT EXISTS posts (
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
+
id INTEGER UNIQUE PRIMARY KEY AUTOINCREMENT,
user TEXT NOT NULL,
service TEXT NOT NULL,
identifier TEXT NOT NULL,
···
CREATE TABLE IF NOT EXISTS mappings (
original INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
-
mapped INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE
+
mapped INTEGER NOT NULL REFERENCES posts(id) ON DELETE CASCADE,
+
UNIQUE(original, mapped)
);
+5
migrations/002_add_indexes.sql
···
+
CREATE INDEX IF NOT EXISTS idx_posts_service_user_identifier
+
ON posts (service, user, identifier);
+
+
CREATE UNIQUE INDEX IF NOT EXISTS ux_mappings_original_mapped
+
ON mappings (original, mapped);
+1 -2
misskey/info.py
···
import requests
from cross.service import Service
-
from util.util import LOGGER
class MisskeyService(ABC, Service):
···
headers={"Content-Type": "application/json"},
)
if responce.status_code != 200:
-
LOGGER.error("Failed to validate user credentials!")
+
self.log.error("Failed to validate user credentials!")
responce.raise_for_status()
return dict(responce.json())
+7 -7
misskey/input.py
···
from cross.service import InputService
from database.connection import DatabasePool
from misskey.info import MisskeyService
-
from util.util import LOGGER, normalize_service_url
+
from util.util import normalize_service_url
ALLOWED_VISIBILITY = ["public", "home"]
···
super().__init__(options.instance, db)
self.options: MisskeyInputOptions = options
-
LOGGER.info("Verifying %s credentails...", self.url)
+
self.log.info("Verifying %s credentails...", self.url)
responce = self.verify_credentials()
self.user_id: str = responce["id"]
···
return self.options.token
def _on_note(self, note: dict[str, Any]):
-
LOGGER.info(note) # TODO
+
self.log.info(note) # TODO
def _accept_msg(self, msg: websockets.Data) -> None:
data: dict[str, Any] = cast(dict[str, Any], json.loads(msg))
···
}
)
)
-
LOGGER.info("Subscribed to 'homeTimeline' channel...")
+
self.log.info("Subscribed to 'homeTimeline' channel...")
@override
async def listen(self):
···
async for ws in websockets.connect(url):
try:
-
LOGGER.info("Listening to %s...", streaming)
+
self.log.info("Listening to %s...", streaming)
await self._subscribe_to_home(ws)
async def listen_for_messages():
···
_ = await asyncio.gather(listen)
except websockets.ConnectionClosedError as e:
-
LOGGER.error(e, stack_info=True, exc_info=True)
-
LOGGER.info("Reconnecting to %s...", streaming)
+
self.log.error(e, stack_info=True, exc_info=True)
+
self.log.info("Reconnecting to %s...", streaming)
continue
+3 -1
registry_bootstrap.py
···
options_class = getattr(module, self.options_class_name)
return service_class(db, options_class.from_dict(d))
-
def bootstrap():
input_factories["mastodon-wss"] = LazyFactory(
"mastodon.input", "MastodonInputService", "MastodonInputOptions"
···
input_factories["bluesky-jetstream"] = LazyFactory(
"bluesky.input", "BlueskyJetstreamInputService", "BlueskyJetstreamInputOptions"
)
+
output_factories['stderr'] = LazyFactory(
+
"util.dummy", "StderrOutputService", "DummyOptions"
+
)
+29
util/dummy.py
···
+
from typing import override
+
from cross.post import Post
+
from cross.service import OutputService
+
from database.connection import DatabasePool
+
+
class DummyOptions:
+
@classmethod
+
def from_dict(cls, obj) -> 'DummyOptions':
+
return DummyOptions()
+
+
class StderrOutputService(OutputService):
+
def __init__(self, db: DatabasePool, options: DummyOptions) -> None:
+
super().__init__("http://localhost", db)
+
+
@override
+
def accept_post(self, post: Post):
+
self.log.info("%s", post)
+
+
@override
+
def accept_repost(self, repost_id: str, reposted_id: str):
+
self.log.info("%s, %s", repost_id, reposted_id)
+
+
@override
+
def delete_post(self, post_id: str):
+
self.log.info("%s", post_id)
+
+
@override
+
def delete_repost(self, repost_id: str):
+
self.log.info("%s", repost_id)