Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

hello almost v0

+81 -27
Cargo.lock
···
[[package]]
name = "clap"
-
version = "4.5.35"
+
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944"
+
checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f"
dependencies = [
"clap_builder",
"clap_derive",
···
[[package]]
name = "clap_builder"
-
version = "4.5.35"
+
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9"
+
checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e"
dependencies = [
"anstream",
"anstyle",
···
[[package]]
name = "clap_derive"
-
version = "4.5.32"
+
version = "4.5.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7"
+
checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce"
dependencies = [
"heck",
"proc-macro2",
···
"tokio",
"tokio-util",
"tower-http",
-
"tungstenite",
+
"tungstenite 0.26.2",
"zstd",
]
···
[[package]]
name = "dropshot"
-
version = "0.16.0"
+
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "a37c505dad56e0c1fa5ed47e29fab1a1ab2d1a9d93e952024bb47168969705f6"
+
checksum = "50e8fed669e35e757646ad10f97c4d26dd22cce3da689b307954f7000d2719d0"
dependencies = [
"async-stream",
"async-trait",
···
[[package]]
name = "dropshot_endpoint"
-
version = "0.16.0"
+
version = "0.16.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "8b1a6db3728f0195e3ad62807649913aaba06d45421e883416e555e51464ef67"
+
checksum = "acebb687581abdeaa2c89fa448818a5f803b0e68e5d7e7a1cf585a8f3c5c57ac"
dependencies = [
"heck",
"proc-macro2",
···
"serde_json",
"thiserror 2.0.12",
"tokio",
-
"tokio-tungstenite",
+
"tokio-tungstenite 0.26.2",
"url",
"zstd",
···
[[package]]
name = "serde_spanned"
-
version = "0.6.8"
+
version = "0.6.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
+
checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
dependencies = [
"serde",
···
[[package]]
+
name = "spacedust"
+
version = "0.1.0"
+
dependencies = [
+
"clap",
+
"dropshot",
+
"futures",
+
"jetstream",
+
"links",
+
"metrics",
+
"schemars",
+
"serde",
+
"serde_json",
+
"tinyjson",
+
"tokio",
+
"tokio-tungstenite 0.27.0",
+
]
+
+
[[package]]
name = "spin"
version = "0.9.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
[[package]]
name = "syn"
-
version = "2.0.100"
+
version = "2.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
+
checksum = "e4307e30089d6fd6aff212f2da3a1f9e32f3223b1f010fb09b7c95f90f3ca1e8"
dependencies = [
"proc-macro2",
"quote",
···
[[package]]
name = "tokio"
-
version = "1.44.2"
+
version = "1.45.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
+
checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779"
dependencies = [
"backtrace",
"bytes",
···
"native-tls",
"tokio",
"tokio-native-tls",
-
"tungstenite",
+
"tungstenite 0.26.2",
+
]
+
+
[[package]]
+
name = "tokio-tungstenite"
+
version = "0.27.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "489a59b6730eda1b0171fcfda8b121f4bee2b35cba8645ca35c5f7ba3eb736c1"
+
dependencies = [
+
"futures-util",
+
"log",
+
"tokio",
+
"tungstenite 0.27.0",
[[package]]
···
[[package]]
name = "toml"
-
version = "0.8.20"
+
version = "0.8.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
+
checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
dependencies = [
"serde",
"serde_spanned",
···
[[package]]
name = "toml_datetime"
-
version = "0.6.8"
+
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
dependencies = [
"serde",
[[package]]
name = "toml_edit"
-
version = "0.22.24"
+
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
+
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap 2.9.0",
"serde",
"serde_spanned",
"toml_datetime",
+
"toml_write",
"winnow",
+
+
[[package]]
+
name = "toml_write"
+
version = "0.1.2"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
[[package]]
name = "tower"
···
"sha1",
"thiserror 2.0.12",
"url",
+
"utf-8",
+
]
+
+
[[package]]
+
name = "tungstenite"
+
version = "0.27.0"
+
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "eadc29d668c91fcc564941132e17b28a7ceb2f3ebf0b9dae3e03fd7a6748eb0d"
+
dependencies = [
+
"bytes",
+
"data-encoding",
+
"http",
+
"httparse",
+
"log",
+
"rand 0.9.1",
+
"sha1",
+
"thiserror 2.0.12",
"utf-8",
···
[[package]]
name = "winnow"
-
version = "0.7.6"
+
version = "0.7.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "63d3fcd9bba44b03821e7d699eeee959f3126dcc4aa8e4ae18ec617c2a5cea10"
+
checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd"
dependencies = [
"memchr",
+1
Cargo.toml
···
"jetstream",
"ufos",
"ufos/fuzz",
+
"spacedust",
]
+18
spacedust/Cargo.toml
···
+
[package]
+
name = "spacedust"
+
version = "0.1.0"
+
edition = "2024"
+
+
[dependencies]
+
clap = { version = "4.5.40", features = ["derive"] }
+
dropshot = "0.16.2"
+
futures = "0.3.31"
+
jetstream = { path = "../jetstream", features = ["metrics"] }
+
links = { path = "../links" }
+
metrics = "0.24.2"
+
schemars = "0.8.22"
+
serde = { version = "1.0.219", features = ["derive"] }
+
serde_json = "1.0.140"
+
tinyjson = "2.5.1"
+
tokio = { version = "1.45.1", features = ["full"] }
+
tokio-tungstenite = "0.27.0"
+82
spacedust/src/consumer.rs
···
+
use crate::LinkEvent;
+
use jetstream::{
+
DefaultJetstreamEndpoints, JetstreamCompression, JetstreamConfig, JetstreamConnector,
+
events::{CommitOp, Cursor, EventKind},
+
};
+
use links::collect_links;
+
use std::error::Error;
+
use tokio::sync::broadcast;
+
+
const MAX_LINKS_PER_EVENT: usize = 100;
+
+
pub async fn consume(
+
b: broadcast::Sender<LinkEvent>,
+
jetstream_endpoint: &str,
+
cursor: Option<Cursor>,
+
no_zstd: bool,
+
) -> Result<(), Box<dyn Error>> {
+
let endpoint = DefaultJetstreamEndpoints::endpoint_or_shortcut(jetstream_endpoint);
+
if endpoint == jetstream_endpoint {
+
std::println!("connecting to jetstream at {endpoint}");
+
} else {
+
std::println!("connecting to jetstream at {jetstream_endpoint} => {endpoint}");
+
}
+
let config: JetstreamConfig = JetstreamConfig {
+
endpoint,
+
compression: if no_zstd {
+
JetstreamCompression::None
+
} else {
+
JetstreamCompression::Zstd
+
},
+
replay_on_reconnect: true,
+
channel_size: 1024, // buffer up to ~1s of jetstream events
+
..Default::default()
+
};
+
let mut receiver = JetstreamConnector::new(config)?
+
.connect_cursor(cursor)
+
.await?;
+
+
while let Some(event) = receiver.recv().await {
+
if event.kind != EventKind::Commit {
+
continue;
+
}
+
let Some(commit) = event.commit else {
+
eprintln!("jetstream commit event missing commit data, ignoring");
+
continue;
+
};
+
+
// TODO: keep a buffer and remove quick deletes to debounce notifs
+
// for now we just drop all deletes eek
+
if commit.operation == CommitOp::Delete {
+
continue;
+
}
+
let Some(record) = commit.record else {
+
eprintln!("jetstream commit update/delete missing record, ignoring");
+
continue;
+
};
+
+
let jv = record.get().parse()?;
+
+
// todo: indicate if the link limit was reached (-> links omitted)
+
for (i, link) in collect_links(&jv).into_iter().enumerate() {
+
if i >= MAX_LINKS_PER_EVENT {
+
eprintln!("jetstream event has too many links, ignoring the rest");
+
break;
+
}
+
let link_ev = LinkEvent {
+
collection: commit.collection.to_string(),
+
path: link.path,
+
origin: format!(
+
"at://{}/{}/{}",
+
&*event.did,
+
&*commit.collection,
+
&*commit.rkey,
+
),
+
target: link.target.into_string(),
+
};
+
let _ = b.send(link_ev); // only errors if no subscribers are connected, which is just fine.
+
}
+
}
+
+
Err("jetstream consumer ended".into())
+
}
+31
spacedust/src/lib.rs
···
+
pub mod consumer;
+
pub mod server;
+
+
use serde::Serialize;
+
+
#[derive(Debug, Clone)]
+
pub struct LinkEvent {
+
collection: String,
+
path: String,
+
origin: String,
+
target: String,
+
}
+
+
#[derive(Debug, Serialize)]
+
struct ClientEvent {
+
source: String,
+
origin: String,
+
target: String,
+
// TODO: include the record too? would save clients a level of hydration
+
}
+
+
impl From<LinkEvent> for ClientEvent {
+
fn from(link: LinkEvent) -> Self {
+
let undotted = link.path.get(1..).unwrap_or("");
+
Self {
+
source: format!("{}:{undotted}", link.collection),
+
origin: link.origin,
+
target: link.target,
+
}
+
}
+
}
+54
spacedust/src/main.rs
···
+
use spacedust::consumer;
+
use spacedust::server;
+
+
use clap::Parser;
+
use tokio::sync::broadcast;
+
+
/// Aggregate links in the at-mosphere
+
#[derive(Parser, Debug, Clone)]
+
#[command(version, about, long_about = None)]
+
struct Args {
+
/// Jetstream server to connect to (exclusive with --fixture). Provide either a wss:// URL, or a shorhand value:
+
/// 'us-east-1', 'us-east-2', 'us-west-1', or 'us-west-2'
+
#[arg(long)]
+
jetstream: String,
+
/// don't request zstd-compressed jetstream events
+
///
+
/// reduces CPU at the expense of more ingress bandwidth
+
#[arg(long, action)]
+
jetstream_no_zstd: bool,
+
}
+
+
#[tokio::main]
+
async fn main() -> Result<(), String> {
+
let args = Args::parse();
+
+
// tokio broadcast keeps a single main output queue for all subscribers.
+
// each subscriber clones off a copy of an individual value for each recv.
+
// since there's no large per-client buffer, we can make this one kind of
+
// big and accommodate more slow/bursty clients.
+
//
+
// in fact, we *could* even keep lagging clients alive, inserting lag-
+
// indicating messages to their output.... but for now we'll drop them to
+
// avoid accumulating zombies.
+
//
+
// events on the channel are individual links as they are discovered. a link
+
// contains a source and a target. the target is an at-uri, so it's up to
+
// ~1KB max; source is a collection + link path, which can be more but in
+
// practice the whole link rarely approaches 1KB total.
+
//
+
// TODO: determine if a pathological case could blow this up (eg 1MB link
+
// paths + slow subscriber -> 16GiB queue)
+
let (b, _) = broadcast::channel(16_384);
+
+
let consuming = consumer::consume(b.clone(), &args.jetstream, None, args.jetstream_no_zstd);
+
+
let serving = server::serve(b);
+
+
tokio::select! {
+
e = serving => eprintln!("serving failed: {e:?}"),
+
e = consuming => eprintln!("consuming failed: {e:?}"),
+
};
+
+
Ok(())
+
}
+83
spacedust/src/server.rs
···
+
use crate::{ClientEvent, LinkEvent};
+
use dropshot::{
+
ApiDescription, ConfigDropshot, ConfigLogging, ConfigLoggingLevel, Query, RequestContext,
+
ServerBuilder, WebsocketConnection, channel,
+
};
+
use futures::SinkExt;
+
use schemars::JsonSchema;
+
use serde::Deserialize;
+
use tokio::sync::broadcast;
+
use tokio_tungstenite::tungstenite::Message;
+
use tokio_tungstenite::tungstenite::protocol::Role;
+
+
pub async fn serve(b: broadcast::Sender<LinkEvent>) -> Result<(), String> {
+
let config_logging = ConfigLogging::StderrTerminal {
+
level: ConfigLoggingLevel::Info,
+
};
+
+
let log = config_logging
+
.to_logger("example-basic")
+
.map_err(|error| format!("failed to create logger: {}", error))?;
+
+
let mut api = ApiDescription::new();
+
api.register(subscribe).unwrap();
+
+
let server = ServerBuilder::new(api, b, log)
+
.config(ConfigDropshot {
+
bind_address: "0.0.0.0:9998".parse().unwrap(),
+
..Default::default()
+
})
+
.start()
+
.map_err(|error| format!("failed to create server: {}", error))?;
+
+
server.await
+
}
+
+
#[derive(Deserialize, JsonSchema)]
+
struct QueryParams {
+
_hello: Option<String>,
+
}
+
+
#[channel {
+
protocol = WEBSOCKETS,
+
path = "/subscribe",
+
}]
+
async fn subscribe(
+
ctx: RequestContext<broadcast::Sender<LinkEvent>>,
+
_qp: Query<QueryParams>,
+
upgraded: WebsocketConnection,
+
) -> dropshot::WebsocketChannelResult {
+
let mut ws = tokio_tungstenite::WebSocketStream::from_raw_socket(
+
upgraded.into_inner(),
+
Role::Server,
+
None,
+
)
+
.await;
+
let mut sub = ctx.context().subscribe();
+
+
// TODO: pingpong
+
// TODO: filtering subscription
+
+
loop {
+
match sub.recv().await {
+
Ok(link) => {
+
let json = serde_json::to_string::<ClientEvent>(&link.into())?;
+
if let Err(e) = ws.send(Message::Text(json.into())).await {
+
eprintln!("client: failed to send event: {e:?}");
+
ws.close(None).await?; // TODO: do we need this one??
+
break;
+
}
+
}
+
Err(broadcast::error::RecvError::Closed) => {
+
ws.close(None).await?; // TODO: send reason
+
break;
+
}
+
Err(broadcast::error::RecvError::Lagged(_n_missed)) => {
+
eprintln!("client lagged, closing");
+
ws.close(None).await?; // TODO: send reason
+
break;
+
}
+
}
+
}
+
Ok(())
+
}