Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

handle subscriber transitions, probably

+10 -4
Cargo.lock
···
[[package]]
name = "ctrlc"
-
version = "3.4.6"
+
version = "3.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "697b5419f348fd5ae2478e8018cb016c00a5881c7f46c717de98ffd135a5651c"
+
checksum = "46f93780a459b7d656ef7f071fe699c4d3d2cb201c4b24d085b6ddc505276e73"
dependencies = [
"nix",
"windows-sys 0.59.0",
···
[[package]]
name = "nix"
-
version = "0.29.0"
+
version = "0.30.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
+
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
dependencies = [
"bitflags",
"cfg-if",
···
dependencies = [
"async-trait",
"clap",
+
"ctrlc",
"dropshot",
+
"env_logger",
"futures",
"http",
"jetstream",
"links",
+
"log",
"metrics",
+
"rand 0.9.1",
"schemars",
"semver",
"serde",
"serde_json",
"serde_qs",
+
"thiserror 2.0.12",
"tinyjson",
"tokio",
"tokio-tungstenite 0.27.0",
+
"tokio-util",
[[package]]
+6
spacedust/Cargo.toml
···
[dependencies]
async-trait = "0.1.88"
clap = { version = "4.5.40", features = ["derive"] }
+
ctrlc = "3.4.7"
dropshot = "0.16.2"
+
env_logger = "0.11.8"
futures = "0.3.31"
http = "1.3.1"
jetstream = { path = "../jetstream", features = ["metrics"] }
links = { path = "../links" }
+
log = "0.4.27"
metrics = "0.24.2"
+
rand = "0.9.1"
schemars = "0.8.22"
semver = "1.0.26"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
serde_qs = "1.0.0-rc.3"
+
thiserror = "2.0.12"
tinyjson = "2.5.1"
tokio = { version = "1.45.1", features = ["full"] }
tokio-tungstenite = "0.27.0"
+
tokio-util = "0.7.15"
+24 -11
spacedust/src/consumer.rs
···
+
use tokio_util::sync::CancellationToken;
use crate::LinkEvent;
+
use crate::error::ConsumerError;
use jetstream::{
DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector,
events::{CommitOp, Cursor, EventKind},
};
use links::collect_links;
-
use std::error::Error;
use tokio::sync::broadcast;
const MAX_LINKS_PER_EVENT: usize = 100;
pub async fn consume(
b: broadcast::Sender<LinkEvent>,
-
jetstream_endpoint: &str,
+
jetstream_endpoint: String,
cursor: Option<Cursor>,
no_zstd: bool,
-
) -> Result<(), Box<dyn Error>> {
-
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint);
+
shutdown: CancellationToken,
+
) -> Result<(), ConsumerError> {
+
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(&jetstream_endpoint);
if endpoint == jetstream_endpoint {
-
std::println!("connecting to jetstream at {endpoint}");
+
log::info!("connecting to jetstream at {endpoint}");
} else {
-
std::println!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
+
log::info!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
}
let config: JetstreamConfig = JetstreamConfig {
endpoint,
···
.connect_cursor(cursor)
.await?;
-
while let Some(event) = receiver.recv().await {
+
log::info!("receiving jetstream messages..");
+
loop {
+
if shutdown.is_cancelled() {
+
log::info!("exiting consumer for shutdown");
+
break;
+
}
+
let Some(event) = receiver.recv().await else {
+
log::error!("could not receive jetstream event, shutting down...");
+
shutdown.cancel();
+
break;
+
};
+
if event.kind != EventKind::Commit {
continue;
}
let Some(commit) = event.commit else {
-
eprintln!("jetstream commit event missing commit data, ignoring");
+
log::warn!("jetstream commit event missing commit data, ignoring");
continue;
};
···
continue;
}
let Some(record) = commit.record else {
-
eprintln!("jetstream commit update/delete missing record, ignoring");
+
log::warn!("jetstream commit update/delete missing record, ignoring");
continue;
};
···
// todo: indicate if the link limit was reached (-> links omitted)
for (i, link) in collect_links(&jv).into_iter().enumerate() {
if i >= MAX_LINKS_PER_EVENT {
-
eprintln!("jetstream event has too many links, ignoring the rest");
+
log::warn!("jetstream event has too many links, ignoring the rest");
break;
}
let link_ev = LinkEvent {
···
}
}
-
Err("jetstream consumer ended".into())
+
Err(ConsumerError::JetstreamEnded)
}
+13
spacedust/src/error.rs
···
+
use thiserror::Error;
+
+
#[derive(Debug, Error)]
+
pub enum ConsumerError {
+
#[error(transparent)]
+
JetstreamConnectionError(#[from] jetstream::error::ConnectionError),
+
#[error(transparent)]
+
JetstreamConfigValidationError(#[from] jetstream::error::ConfigValidationError),
+
#[error(transparent)]
+
JsonParseError(#[from] tinyjson::JsonParseError),
+
#[error("jetstream ended")]
+
JetstreamEnded
+
}
+1
spacedust/src/lib.rs
···
pub mod consumer;
+
pub mod error;
pub mod server;
pub mod subscriber;
+23 -7
spacedust/src/main.rs
···
use clap::Parser;
use tokio::sync::broadcast;
+
use tokio_util::sync::CancellationToken;
/// Aggregate links in the at-mosphere
#[derive(Parser, Debug, Clone)]
···
#[tokio::main]
async fn main() -> Result<(), String> {
-
let args = Args::parse();
+
env_logger::init();
// tokio broadcast keeps a single main output queue for all subscribers.
// each subscriber clones off a copy of an individual value for each recv.
···
// TODO: determine if a pathological case could blow this up (eg 1MB link
// paths + slow subscriber -> 16GiB queue)
let (b, _) = broadcast::channel(16_384);
+
let consumer_sender = b.clone();
-
let consuming = consumer::consume(b.clone(), &args.jetstream, None, args.jetstream_no_zstd);
+
let shutdown = CancellationToken::new();
+
+
let ctrlc_shutdown = shutdown.clone();
+
ctrlc::set_handler(move || ctrlc_shutdown.cancel()).expect("failed to set ctrl-c handler");
-
let serving = server::serve(b);
+
let args = Args::parse();
-
tokio::select! {
-
e = serving => eprintln!("serving failed: {e:?}"),
-
e = consuming => eprintln!("consuming failed: {e:?}"),
-
};
+
let server_shutdown = shutdown.clone();
+
let serving = tokio::spawn(async move {
+
server::serve(b, server_shutdown).await
+
});
+
+
let consumer_shutdown = shutdown.clone();
+
let consuming = tokio::spawn(async move {
+
consumer::consume(consumer_sender, args.jetstream, None, args.jetstream_no_zstd, consumer_shutdown).await
+
});
+
+
let (served, consumed) = tokio::join!(serving, consuming);
+
log::info!("serving ended: {served:?}");
+
log::info!("consuming ended: {consumed:?}");
+
+
log::info!("bye!");
Ok(())
}
+23 -12
spacedust/src/server.rs
···
-
use crate::subscriber;
+
use crate::subscriber::Subscriber;
use metrics::{histogram, counter};
use std::sync::Arc;
use crate::LinkEvent;
···
use tokio::sync::broadcast;
use tokio::time::Instant;
use tokio_tungstenite::tungstenite::protocol::Role;
+
use tokio_util::sync::CancellationToken;
use async_trait::async_trait;
use std::collections::HashSet;
const INDEX_HTML: &str = include_str!("../static/index.html");
const FAVICON: &[u8] = include_bytes!("../static/favicon.ico");
-
pub async fn serve(b: broadcast::Sender<LinkEvent>) -> Result<(), String> {
+
pub async fn serve(b: broadcast::Sender<LinkEvent>, shutdown: CancellationToken) -> Result<(), String> {
let config_logging = ConfigLogging::StderrTerminal {
level: ConfigLoggingLevel::Info,
};
···
.map_err(|e| e.to_string())?,
);
-
let ctx = Context { spec, b };
+
let sub_shutdown = shutdown.clone();
+
let ctx = Context { spec, b, shutdown: sub_shutdown };
let server = ServerBuilder::new(api, ctx, log)
.config(ConfigDropshot {
···
.start()
.map_err(|error| format!("failed to create server: {}", error))?;
-
server.await
+
tokio::select! {
+
s = server.wait_for_shutdown() => {
+
log::error!("dropshot server ended: {s:?}");
+
s
+
},
+
_ = shutdown.cancelled() => {
+
log::info!("shutting down server");
+
server.close().await?;
+
Err("shutdown requested".to_string())
+
}
+
}
}
#[derive(Debug, Clone)]
struct Context {
pub spec: Arc<serde_json::Value>,
pub b: broadcast::Sender<LinkEvent>,
+
pub shutdown: CancellationToken,
}
async fn instrument_handler<T, H, R>(ctx: &RequestContext<T>, handler: H) -> Result<R, HttpError>
···
}
}
-
#[derive(Deserialize, JsonSchema)]
-
struct QueryParams {
-
_hello: Option<String>,
-
}
-
#[channel {
protocol = WEBSOCKETS,
path = "/subscribe",
}]
async fn subscribe(
-
ctx: RequestContext<Context>,
+
reqctx: RequestContext<Context>,
query: MultiSubscribeQuery,
upgraded: WebsocketConnection,
) -> dropshot::WebsocketChannelResult {
···
)
.await;
-
let b = ctx.context().b.subscribe();
+
let Context { b, shutdown, .. } = reqctx.context();
+
let sub_token = shutdown.child_token();
+
let subscription = b.subscribe();
-
subscriber::subscribe(b, ws, query)
+
Subscriber::new(query, sub_token)
+
.start(ws, subscription)
.await
.map_err(|e| format!("boo: {e:?}"))?;
+142 -55
spacedust/src/subscriber.rs
···
+
use tokio::time::interval;
+
use std::time::Duration;
+
use futures::StreamExt;
use crate::ClientEvent;
use crate::LinkEvent;
use crate::server::MultiSubscribeQuery;
use futures::SinkExt;
use std::error::Error;
-
use tokio::sync::broadcast;
+
use tokio::sync::broadcast::{self, error::RecvError};
use tokio_tungstenite::{WebSocketStream, tungstenite::Message};
+
use tokio_util::sync::CancellationToken;
use dropshot::WebsocketConnectionRaw;
-
pub async fn subscribe(
-
mut sub: broadcast::Receiver<LinkEvent>,
-
mut ws: WebSocketStream<WebsocketConnectionRaw>,
+
const PING_PERIOD: Duration = Duration::from_secs(30);
+
+
pub struct Subscriber {
query: MultiSubscribeQuery,
-
) -> Result<(), Box<dyn Error>> {
-
// TODO: pingpong
+
shutdown: CancellationToken,
+
}
-
loop {
-
match sub.recv().await {
-
Ok(link) => {
+
impl Subscriber {
+
pub fn new(
+
query: MultiSubscribeQuery,
+
shutdown: CancellationToken,
+
) -> Self {
+
log::warn!("new sub...");
+
Self { query, shutdown }
+
}
-
// subject + subject DIDs are logical OR
-
let target_did = if link.target.starts_with("did:") {
-
link.target.clone()
-
} else {
-
let Some(rest) = link.target.strip_prefix("at://") else {
-
continue;
-
};
-
if let Some((did, _)) = rest.split_once("/") {
-
did
-
} else {
-
rest
-
}.to_string()
-
};
-
if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) {
-
// wowwww ^^ fix that
-
continue;
-
}
+
pub async fn start(
+
self,
+
ws: WebSocketStream<WebsocketConnectionRaw>,
+
mut receiver: broadcast::Receiver<LinkEvent>
+
) -> Result<(), Box<dyn Error>> {
+
log::warn!("starting new sub...");
+
let mut ping_state = None;
+
let (mut ws_sender, mut ws_receiver) = ws.split();
+
let mut ping_interval = interval(PING_PERIOD);
+
let _guard = self.shutdown.clone().drop_guard();
-
// subjects together with sources are logical AND
+
// TODO: do we need to timeout ws sends??
-
if !query.wanted_sources.is_empty() {
-
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
-
eprintln!("link path did not have expected '.' prefix: {}", link.path);
-
""
-
});
-
let source = format!("{}:{undotted}", link.collection);
-
if !query.wanted_sources.contains(&source) {
-
continue;
+
loop {
+
tokio::select! {
+
l = receiver.recv() => match l {
+
Ok(link) => if let Some(message) = self.filter(link) {
+
if let Err(e) = ws_sender.send(message).await {
+
log::warn!("failed to send link, dropping subscriber: {e:?}");
+
break;
+
}
+
},
+
Err(RecvError::Closed) => self.shutdown.cancel(),
+
Err(RecvError::Lagged(n)) => {
+
log::warn!("dropping lagging subscriber (missed {n} messages already)");
+
self.shutdown.cancel();
+
}
+
},
+
cm = ws_receiver.next() => match cm {
+
Some(Ok(Message::Ping(state))) => {
+
if let Err(e) = ws_sender.send(Message::Pong(state)).await {
+
log::error!("failed to reply pong to subscriber: {e:?}");
+
break;
+
}
+
}
+
Some(Ok(Message::Pong(state))) => {
+
if let Some(expected_state) = ping_state {
+
if *state == expected_state {
+
ping_state = None; // good
+
} else {
+
log::error!("subscriber returned a pong with the wrong state, dropping");
+
self.shutdown.cancel();
+
}
+
} else {
+
log::error!("subscriber sent a pong when none was expected");
+
self.shutdown.cancel();
+
}
+
}
+
Some(Ok(m)) => log::trace!("subscriber sent an unexpected message: {m:?}"),
+
Some(Err(e)) => {
+
log::error!("failed to receive subscriber message: {e:?}");
+
break;
+
}
+
None => {
+
log::trace!("end of subscriber messages. bye!");
+
break;
+
}
+
},
+
_ = ping_interval.tick() => {
+
if ping_state.is_some() {
+
log::warn!("did not recieve pong within {PING_PERIOD:?}, dropping subscriber");
+
self.shutdown.cancel();
+
} else {
+
let new_state: [u8; 8] = rand::random();
+
let ping = new_state.to_vec().into();
+
ping_state = Some(new_state);
+
if let Err(e) = ws_sender.send(Message::Ping(ping)).await {
+
log::error!("failed to send ping to subscriber, dropping: {e:?}");
+
self.shutdown.cancel();
+
}
}
}
-
-
let ev = ClientEvent {
-
kind: "link".to_string(),
-
origin: "live".to_string(),
-
link: link.into(),
-
};
-
let json = serde_json::to_string(&ev)?;
-
if let Err(e) = ws.send(Message::Text(json.into())).await {
-
eprintln!("client: failed to send event: {e:?}");
-
ws.close(None).await?; // TODO: do we need this one??
+
_ = self.shutdown.cancelled() => {
+
log::info!("subscriber shutdown requested, bye!");
+
if let Err(e) = ws_sender.close().await {
+
log::warn!("failed to close subscriber: {e:?}");
+
}
break;
-
}
-
}
-
Err(broadcast::error::RecvError::Closed) => {
-
ws.close(None).await?; // TODO: send reason
-
break;
+
},
}
-
Err(broadcast::error::RecvError::Lagged(_n_missed)) => {
-
eprintln!("client lagged, closing");
-
ws.close(None).await?; // TODO: send reason
-
break;
+
}
+
log::trace!("end of subscriber. bye!");
+
Ok(())
+
}
+
+
fn filter(
+
&self,
+
link: LinkEvent,
+
// mut sender: impl Sink<Message> + Unpin
+
) -> Option<Message> {
+
let query = &self.query;
+
+
// subject + subject DIDs are logical OR
+
let target_did = if link.target.starts_with("did:") {
+
link.target.clone()
+
} else {
+
let Some(rest) = link.target.strip_prefix("at://") else {
+
return None
+
};
+
if let Some((did, _)) = rest.split_once("/") {
+
did
+
} else {
+
rest
+
}.to_string()
+
};
+
if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) {
+
// wowwww ^^ fix that
+
return None
+
}
+
+
// subjects together with sources are logical AND
+
+
if !query.wanted_sources.is_empty() {
+
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
+
eprintln!("link path did not have expected '.' prefix: {}", link.path);
+
""
+
});
+
let source = format!("{}:{undotted}", link.collection);
+
if !query.wanted_sources.contains(&source) {
+
return None
}
}
+
+
let ev = ClientEvent {
+
kind: "link".to_string(),
+
origin: "live".to_string(),
+
link: link.into(),
+
};
+
+
let json = serde_json::to_string(&ev).unwrap();
+
+
Some(Message::Text(json.into()))
}
-
Ok(())
}