Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

extract filterable properties & render json early

do as much per-event work as possible in the consumer, so subscribers can filter as cheaply as possible and have a pre-rendered message ready for them to send to connected clients.

Changed files
+129 -98
links
src
spacedust
+15
links/src/lib.rs
···
None
}
}
}
#[derive(Debug, PartialEq)]
···
None
}
}
+
pub fn did(&self) -> Option<String> {
+
let did = match self {
+
Link::AtUri(s) => {
+
let rest = s.strip_prefix("at://")?; // todo: this might be safe to unwrap?
+
if let Some((did, _)) = rest.split_once("/") {
+
did
+
} else {
+
rest
+
}
+
}
+
Link::Uri(_) => return None,
+
Link::Did(did) => did,
+
};
+
Some(did.to_string())
+
}
}
#[derive(Debug, PartialEq)]
+29 -23
spacedust/src/consumer.rs
···
use tokio_util::sync::CancellationToken;
-
use crate::LinkEvent;
use crate::error::ConsumerError;
use crate::removable_delay_queue;
use jetstream::{
···
const MAX_LINKS_PER_EVENT: usize = 100;
pub async fn consume(
-
b: broadcast::Sender<LinkEvent>,
-
d: removable_delay_queue::Input<(String, usize), LinkEvent>,
jetstream_endpoint: String,
cursor: Option<Cursor>,
no_zstd: bool,
···
) -> Result<(), ConsumerError> {
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(&jetstream_endpoint);
if endpoint == jetstream_endpoint {
-
log::info!("connecting to jetstream at {endpoint}");
} else {
-
log::info!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
}
let config: JetstreamConfig = JetstreamConfig {
endpoint,
···
.connect_cursor(cursor)
.await?;
-
log::info!("receiving jetstream messages..");
loop {
if shutdown.is_cancelled() {
-
log::info!("exiting consumer for shutdown");
return Ok(());
}
let Some(event) = receiver.recv().await else {
-
log::error!("could not receive jetstream event, bailing");
break;
};
if event.kind != EventKind::Commit {
continue;
}
-
let Some(commit) = event.commit else {
-
log::warn!("jetstream commit event missing commit data, ignoring");
continue;
};
let at_uri = format!("at://{}/{}/{}", &*event.did, &*commit.collection, &*commit.rkey);
// TODO: keep a buffer and remove quick deletes to debounce notifs
···
d.remove_range((at_uri.clone(), 0)..=(at_uri.clone(), MAX_LINKS_PER_EVENT)).await;
continue;
}
-
let Some(record) = commit.record else {
-
log::warn!("jetstream commit update/delete missing record, ignoring");
continue;
};
let jv = match record.get().parse() {
Ok(v) => v,
Err(e) => {
-
log::warn!("jetstream record failed to parse, ignoring: {e}");
continue;
}
};
-
// todo: indicate if the link limit was reached (-> links omitted)
for (i, link) in collect_links(&jv).into_iter().enumerate() {
if i >= MAX_LINKS_PER_EVENT {
-
log::warn!("jetstream event has too many links, ignoring the rest");
break;
}
-
let link_ev = LinkEvent {
-
collection: commit.collection.to_string(),
-
path: link.path,
-
origin: at_uri.clone(),
-
rev: commit.rev.to_string(),
-
target: link.target.into_string(),
};
-
let _ = b.send(link_ev.clone()); // only errors if no subscribers are connected, which is just fine.
-
d.enqueue((at_uri.clone(), i), link_ev)
.await
.map_err(|_| ConsumerError::DelayQueueOutputDropped)?;
}
···
+
use std::sync::Arc;
use tokio_util::sync::CancellationToken;
+
use crate::ClientMessage;
use crate::error::ConsumerError;
use crate::removable_delay_queue;
use jetstream::{
···
const MAX_LINKS_PER_EVENT: usize = 100;
pub async fn consume(
+
b: broadcast::Sender<Arc<ClientMessage>>,
+
d: removable_delay_queue::Input<(String, usize), Arc<ClientMessage>>,
jetstream_endpoint: String,
cursor: Option<Cursor>,
no_zstd: bool,
···
) -> Result<(), ConsumerError> {
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(&jetstream_endpoint);
if endpoint == jetstream_endpoint {
+
log::info!("consumer: connecting jetstream at {endpoint}");
} else {
+
log::info!("consumer: connecting jetstream at {jetstream_endpoint} => {endpoint}");
}
let config: JetstreamConfig = JetstreamConfig {
endpoint,
···
.connect_cursor(cursor)
.await?;
+
log::info!("consumer: receiving messages..");
loop {
if shutdown.is_cancelled() {
+
log::info!("consumer: exiting for shutdown");
return Ok(());
}
let Some(event) = receiver.recv().await else {
+
log::error!("consumer: could not receive event, bailing");
break;
};
if event.kind != EventKind::Commit {
continue;
}
+
let Some(ref commit) = event.commit else {
+
log::warn!("consumer: commit event missing commit data, ignoring");
continue;
};
+
// TODO: something a bit more robust
let at_uri = format!("at://{}/{}/{}", &*event.did, &*commit.collection, &*commit.rkey);
// TODO: keep a buffer and remove quick deletes to debounce notifs
···
d.remove_range((at_uri.clone(), 0)..=(at_uri.clone(), MAX_LINKS_PER_EVENT)).await;
continue;
}
+
let Some(ref record) = commit.record else {
+
log::warn!("consumer: commit update/delete missing record, ignoring");
continue;
};
let jv = match record.get().parse() {
Ok(v) => v,
Err(e) => {
+
log::warn!("consumer: record failed to parse, ignoring: {e}");
continue;
}
};
for (i, link) in collect_links(&jv).into_iter().enumerate() {
if i >= MAX_LINKS_PER_EVENT {
+
// todo: indicate if the link limit was reached (-> links omitted)
+
log::warn!("consumer: event has too many links, ignoring the rest");
+
metrics::counter!("consumer_dropped_links", "reason" => "too_many_links").increment(1);
break;
}
+
let client_message = match ClientMessage::new_link(link, &at_uri, commit) {
+
Ok(m) => m,
+
Err(e) => {
+
// TODO indicate to clients that a link has been dropped
+
log::warn!("consumer: failed to serialize link to json: {e:?}");
+
metrics::counter!("consumer_dropped_links", "reason" => "failed_to_serialize").increment(1);
+
continue;
+
}
};
+
let message = Arc::new(client_message);
+
let _ = b.send(message.clone()); // only errors if no subscribers are connected, which is just fine.
+
d.enqueue((at_uri.clone(), i), message)
.await
.map_err(|_| ConsumerError::DelayQueueOutputDropped)?;
}
+3 -4
spacedust/src/delay.rs
···
use crate::removable_delay_queue;
-
use crate::LinkEvent;
use tokio_util::sync::CancellationToken;
use tokio::sync::broadcast;
use crate::error::DelayError;
-
pub async fn to_broadcast(
-
source: removable_delay_queue::Output<(String, usize), LinkEvent>,
-
dest: broadcast::Sender<LinkEvent>,
shutdown: CancellationToken,
) -> Result<(), DelayError> {
loop {
···
use crate::removable_delay_queue;
use tokio_util::sync::CancellationToken;
use tokio::sync::broadcast;
use crate::error::DelayError;
+
pub async fn to_broadcast<T>(
+
source: removable_delay_queue::Output<(String, usize), T>,
+
dest: broadcast::Sender<T>,
shutdown: CancellationToken,
) -> Result<(), DelayError> {
loop {
+61 -26
spacedust/src/lib.rs
···
pub mod subscriber;
pub mod removable_delay_queue;
use serde::Serialize;
-
#[derive(Debug, Clone)]
-
pub struct LinkEvent {
-
collection: String,
-
path: String,
-
origin: String,
-
target: String,
-
rev: String,
}
#[derive(Debug, Serialize)]
#[serde(rename_all="snake_case")]
pub struct ClientEvent {
-
kind: String, // "link"
-
origin: String, // "live", "replay", "backfill"
link: ClientLinkEvent,
}
#[derive(Debug, Serialize)]
struct ClientLinkEvent {
-
operation: String, // "create", "delete" (prob no update, though maybe for rev?)
source: String,
source_record: String,
source_rev: String,
subject: String,
// TODO: include the record too? would save clients a level of hydration
-
}
-
-
impl From<LinkEvent> for ClientLinkEvent {
-
fn from(link: LinkEvent) -> Self {
-
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
-
eprintln!("link path did not have expected '.' prefix: {}", link.path);
-
""
-
});
-
Self {
-
operation: "create".to_string(),
-
source: format!("{}:{undotted}", link.collection),
-
source_record: link.origin,
-
source_rev: link.rev,
-
subject: link.target,
-
}
-
}
}
···
pub mod subscriber;
pub mod removable_delay_queue;
+
use links::CollectedLink;
+
use jetstream::events::CommitEvent;
+
use tokio_tungstenite::tungstenite::Message;
use serde::Serialize;
+
#[derive(Debug)]
+
pub struct FilterableProperties {
+
/// Full unmodified DID, at-uri, or url
+
pub subject: String,
+
/// User/identity DID.
+
///
+
/// Will match both bare-DIDs and DIDs extracted from at-uris.
+
/// `None` for any URL.
+
pub subject_did: Option<String>,
+
/// Link source -- collection NSID joined with `:` to the record property path.
+
pub source: String,
+
}
+
+
/// A serialized message with filterable properties attached
+
#[derive(Debug)]
+
pub struct ClientMessage {
+
pub message: Message, // always Message::Text
+
pub properties: FilterableProperties,
+
}
+
+
impl ClientMessage {
+
pub fn new_link(link: CollectedLink, at_uri: &str, commit: &CommitEvent) -> Result<Self, serde_json::Error> {
+
let subject_did = link.target.did();
+
+
let subject = link.target.into_string();
+
+
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
+
eprintln!("link path did not have expected '.' prefix: {}", link.path);
+
""
+
});
+
let source = format!("{}:{undotted}", &*commit.collection);
+
+
let client_link_event = ClientLinkEvent {
+
operation: "create",
+
source: source.clone(),
+
source_record: at_uri.to_string(),
+
source_rev: commit.rev.to_string(),
+
subject: subject.clone(),
+
};
+
+
let client_event = ClientEvent {
+
kind: "link",
+
origin: "live", // TODO: indicate when we're locally replaying jetstream on reconnect?? maybe not.
+
link: client_link_event,
+
};
+
+
let client_event_json = serde_json::to_string(&client_event)?;
+
+
let message = Message::Text(client_event_json.into());
+
+
let properties = FilterableProperties { subject, subject_did, source };
+
+
Ok(ClientMessage { message, properties })
+
}
}
#[derive(Debug, Serialize)]
#[serde(rename_all="snake_case")]
pub struct ClientEvent {
+
kind: &'static str, // "link"
+
origin: &'static str, // "live", "replay", "backfill"
link: ClientLinkEvent,
}
#[derive(Debug, Serialize)]
struct ClientLinkEvent {
+
operation: &'static str, // "create", "delete" (prob no update, though maybe for rev?)
source: String,
source_record: String,
source_rev: String,
subject: String,
// TODO: include the record too? would save clients a level of hydration
+
// ^^ no, not for now. until we backfill + support broader deletes at *least*.
}
+5 -5
spacedust/src/server.rs
···
use crate::subscriber::Subscriber;
use metrics::{histogram, counter};
use std::sync::Arc;
-
use crate::LinkEvent;
use http::{
header::{ORIGIN, USER_AGENT},
Response, StatusCode,
···
const FAVICON: &[u8] = include_bytes!("../static/favicon.ico");
pub async fn serve(
-
b: broadcast::Sender<LinkEvent>,
-
d: broadcast::Sender<LinkEvent>,
shutdown: CancellationToken
) -> Result<(), ServerError> {
let config_logging = ConfigLogging::StderrTerminal {
···
#[derive(Debug, Clone)]
struct Context {
pub spec: Arc<serde_json::Value>,
-
pub b: broadcast::Sender<LinkEvent>,
-
pub d: broadcast::Sender<LinkEvent>,
pub shutdown: CancellationToken,
}
···
use crate::subscriber::Subscriber;
use metrics::{histogram, counter};
use std::sync::Arc;
+
use crate::ClientMessage;
use http::{
header::{ORIGIN, USER_AGENT},
Response, StatusCode,
···
const FAVICON: &[u8] = include_bytes!("../static/favicon.ico");
pub async fn serve(
+
b: broadcast::Sender<Arc<ClientMessage>>,
+
d: broadcast::Sender<Arc<ClientMessage>>,
shutdown: CancellationToken
) -> Result<(), ServerError> {
let config_logging = ConfigLogging::StderrTerminal {
···
#[derive(Debug, Clone)]
struct Context {
pub spec: Arc<serde_json::Value>,
+
pub b: broadcast::Sender<Arc<ClientMessage>>,
+
pub d: broadcast::Sender<Arc<ClientMessage>>,
pub shutdown: CancellationToken,
}
+16 -40
spacedust/src/subscriber.rs
···
use tokio::time::interval;
use std::time::Duration;
use futures::StreamExt;
-
use crate::ClientEvent;
-
use crate::LinkEvent;
use crate::server::MultiSubscribeQuery;
use futures::SinkExt;
use std::error::Error;
···
pub async fn start(
self,
ws: WebSocketStream<WebsocketConnectionRaw>,
-
mut receiver: broadcast::Receiver<LinkEvent>
) -> Result<(), Box<dyn Error>> {
let mut ping_state = None;
let (mut ws_sender, mut ws_receiver) = ws.split();
···
loop {
tokio::select! {
l = receiver.recv() => match l {
-
Ok(link) => if let Some(message) = self.filter(link) {
-
if let Err(e) = ws_sender.send(message).await {
log::warn!("failed to send link, dropping subscriber: {e:?}");
break;
}
···
fn filter(
&self,
-
link: LinkEvent,
-
// mut sender: impl Sink<Message> + Unpin
-
) -> Option<Message> {
let query = &self.query;
// subject + subject DIDs are logical OR
-
let target_did = if link.target.starts_with("did:") {
-
link.target.clone()
-
} else {
-
let rest = link.target.strip_prefix("at://")?;
-
if let Some((did, _)) = rest.split_once("/") {
-
did
-
} else {
-
rest
-
}.to_string()
-
};
-
if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) {
-
// wowwww ^^ fix that
-
return None
}
// subjects together with sources are logical AND
-
-
if !query.wanted_sources.is_empty() {
-
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
-
eprintln!("link path did not have expected '.' prefix: {}", link.path);
-
""
-
});
-
let source = format!("{}:{undotted}", link.collection);
-
if !query.wanted_sources.contains(&source) {
-
return None
-
}
}
-
let ev = ClientEvent {
-
kind: "link".to_string(),
-
origin: "live".to_string(),
-
link: link.into(),
-
};
-
-
let json = serde_json::to_string(&ev).unwrap();
-
-
Some(Message::Text(json.into()))
}
}
···
+
use std::sync::Arc;
use tokio::time::interval;
use std::time::Duration;
use futures::StreamExt;
+
use crate::{ClientMessage, FilterableProperties};
use crate::server::MultiSubscribeQuery;
use futures::SinkExt;
use std::error::Error;
···
pub async fn start(
self,
ws: WebSocketStream<WebsocketConnectionRaw>,
+
mut receiver: broadcast::Receiver<Arc<ClientMessage>>
) -> Result<(), Box<dyn Error>> {
let mut ping_state = None;
let (mut ws_sender, mut ws_receiver) = ws.split();
···
loop {
tokio::select! {
l = receiver.recv() => match l {
+
Ok(link) => if self.filter(&link.properties) {
+
if let Err(e) = ws_sender.send(link.message.clone()).await {
log::warn!("failed to send link, dropping subscriber: {e:?}");
break;
}
···
fn filter(
&self,
+
properties: &FilterableProperties,
+
) -> bool {
let query = &self.query;
// subject + subject DIDs are logical OR
+
if !(
+
query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty() ||
+
query.wanted_subjects.contains(&properties.subject) ||
+
properties.subject_did.as_ref().map(|did| query.wanted_subject_dids.contains(did)).unwrap_or(false)
+
) { // wowwww ^^ fix that
+
return false
}
// subjects together with sources are logical AND
+
if !(query.wanted_sources.is_empty() || query.wanted_sources.contains(&properties.source)) {
+
return false
}
+
true
}
}