Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

event shape and link filtering and stuff

+4
Cargo.lock
···
name = "spacedust"
version = "0.1.0"
dependencies = [
+
"async-trait",
"clap",
"dropshot",
"futures",
+
"http",
"jetstream",
"links",
"metrics",
"schemars",
+
"semver",
"serde",
"serde_json",
+
"serde_qs",
"tinyjson",
"tokio",
"tokio-tungstenite 0.27.0",
+4
spacedust/Cargo.toml
···
edition = "2024"
[dependencies]
+
async-trait = "0.1.88"
clap = { version = "4.5.40", features = ["derive"] }
dropshot = "0.16.2"
futures = "0.3.31"
+
http = "1.3.1"
jetstream = { path = "../jetstream", features = ["metrics"] }
links = { path = "../links" }
metrics = "0.24.2"
schemars = "0.8.22"
+
semver = "1.0.26"
serde = { version = "1.0.219", features = ["derive"] }
serde_json = "1.0.140"
+
serde_qs = "1.0.0-rc.3"
tinyjson = "2.5.1"
tokio = { version = "1.45.1", features = ["full"] }
tokio-tungstenite = "0.27.0"
+1
spacedust/src/consumer.rs
···
&*commit.collection,
&*commit.rkey,
),
+
rev: commit.rev.to_string(),
target: link.target.into_string(),
};
let _ = b.send(link_ev); // only errors if no subscribers are connected, which is just fine.
+11
spacedust/src/lib.rs
···
pub mod consumer;
pub mod server;
+
pub mod subscriber;
use serde::Serialize;
···
path: String,
origin: String,
target: String,
+
rev: String,
+
}
+
+
#[derive(Debug, Serialize)]
+
#[serde(rename_all="snake_case")]
+
pub struct ClientEvent {
+
kind: String,
+
link: ClientLinkEvent,
}
#[derive(Debug, Serialize)]
···
operation: String,
source: String,
source_record: String,
+
source_rev: String,
subject: String,
// TODO: include the record too? would save clients a level of hydration
}
···
operation: "create".to_string(),
source: format!("{}:{undotted}", link.collection),
source_record: link.origin,
+
source_rev: link.rev,
subject: link.target,
}
}
+244 -41
spacedust/src/server.rs
···
-
use crate::{ClientLinkEvent, LinkEvent};
+
use crate::subscriber;
+
use metrics::{histogram, counter};
+
use std::sync::Arc;
+
use crate::LinkEvent;
+
use http::{
+
header::{ORIGIN, USER_AGENT},
+
Response, StatusCode,
+
};
use dropshot::{
+
Body,
ApiDescription, ConfigDropshot, ConfigLogging, ConfigLoggingLevel, Query, RequestContext,
-
ServerBuilder, WebsocketConnection, channel,
+
ServerBuilder, WebsocketConnection, channel, endpoint, HttpResponse,
+
ApiEndpointBodyContentType, ExtractorMetadata, HttpError, ServerContext,
+
SharedExtractor,
};
-
use futures::SinkExt;
+
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use tokio::sync::broadcast;
-
use tokio_tungstenite::tungstenite::Message;
+
use tokio::time::Instant;
use tokio_tungstenite::tungstenite::protocol::Role;
+
use async_trait::async_trait;
+
use std::collections::HashSet;
+
+
const INDEX_HTML: &str = include_str!("../static/index.html");
+
const FAVICON: &[u8] = include_bytes!("../static/favicon.ico");
pub async fn serve(b: broadcast::Sender<LinkEvent>) -> Result<(), String> {
let config_logging = ConfigLogging::StderrTerminal {
···
.map_err(|error| format!("failed to create logger: {}", error))?;
let mut api = ApiDescription::new();
+
api.register(index).unwrap();
+
api.register(favicon).unwrap();
+
api.register(openapi).unwrap();
api.register(subscribe).unwrap();
-
let server = ServerBuilder::new(api, b, log)
+
// TODO: put spec in a once cell / lazy lock thing?
+
let spec = Arc::new(
+
api.openapi(
+
"Spacedust",
+
env!("CARGO_PKG_VERSION")
+
.parse()
+
.inspect_err(|e| {
+
eprintln!("failed to parse cargo package version for openapi: {e:?}")
+
})
+
.unwrap_or(semver::Version::new(0, 0, 1)),
+
)
+
.description("A configurable ATProto notifications firehose.")
+
.contact_name("part of @microcosm.blue")
+
.contact_url("https://microcosm.blue")
+
.json()
+
.map_err(|e| e.to_string())?,
+
);
+
+
let ctx = Context { spec, b };
+
+
let server = ServerBuilder::new(api, ctx, log)
.config(ConfigDropshot {
bind_address: "0.0.0.0:9998".parse().unwrap(),
..Default::default()
···
server.await
}
-
#[derive(Debug, Serialize)]
-
#[serde(rename_all="snake_case")]
-
struct ClientEvent {
-
r#type: String,
-
link: ClientLinkEvent,
+
#[derive(Debug, Clone)]
+
struct Context {
+
pub spec: Arc<serde_json::Value>,
+
pub b: broadcast::Sender<LinkEvent>,
+
}
+
+
async fn instrument_handler<T, H, R>(ctx: &RequestContext<T>, handler: H) -> Result<R, HttpError>
+
where
+
R: HttpResponse,
+
H: Future<Output = Result<R, HttpError>>,
+
T: ServerContext,
+
{
+
let start = Instant::now();
+
let result = handler.await;
+
let latency = start.elapsed();
+
let status_code = match &result {
+
Ok(response) => response.status_code(),
+
Err(e) => e.status_code.as_status(),
+
}
+
.as_str() // just the number (.to_string()'s Display does eg `200 OK`)
+
.to_string();
+
let endpoint = ctx.endpoint.operation_id.clone();
+
let headers = ctx.request.headers();
+
let origin = headers
+
.get(ORIGIN)
+
.and_then(|v| v.to_str().ok())
+
.unwrap_or("")
+
.to_string();
+
let ua = headers
+
.get(USER_AGENT)
+
.and_then(|v| v.to_str().ok())
+
.map(|ua| {
+
if ua.starts_with("Mozilla/5.0 ") {
+
"browser"
+
} else {
+
ua
+
}
+
})
+
.unwrap_or("")
+
.to_string();
+
counter!("server_requests_total",
+
"endpoint" => endpoint.clone(),
+
"origin" => origin,
+
"ua" => ua,
+
"status_code" => status_code,
+
)
+
.increment(1);
+
histogram!("server_handler_latency", "endpoint" => endpoint).record(latency.as_micros() as f64);
+
result
+
}
+
+
use dropshot::{HttpResponseHeaders, HttpResponseOk};
+
+
pub type OkCorsResponse<T> = Result<HttpResponseHeaders<HttpResponseOk<T>>, HttpError>;
+
+
/// Helper for constructing Ok responses: return OkCors(T).into()
+
/// (not happy with this yet)
+
pub struct OkCors<T: Serialize + JsonSchema + Send + Sync>(pub T);
+
+
impl<T> From<OkCors<T>> for OkCorsResponse<T>
+
where
+
T: Serialize + JsonSchema + Send + Sync,
+
{
+
fn from(ok: OkCors<T>) -> OkCorsResponse<T> {
+
let mut res = HttpResponseHeaders::new_unnamed(HttpResponseOk(ok.0));
+
res.headers_mut()
+
.insert("access-control-allow-origin", "*".parse().unwrap());
+
Ok(res)
+
}
+
}
+
+
// TODO: cors for HttpError
+
+
+
/// Serve index page as html
+
#[endpoint {
+
method = GET,
+
path = "/",
+
/*
+
* not useful to have this in openapi
+
*/
+
unpublished = true,
+
}]
+
async fn index(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
+
instrument_handler(&ctx, async {
+
Ok(Response::builder()
+
.status(StatusCode::OK)
+
.header(http::header::CONTENT_TYPE, "text/html")
+
.body(INDEX_HTML.into())?)
+
})
+
.await
+
}
+
+
/// Serve index page as html
+
#[endpoint {
+
method = GET,
+
path = "/favicon.ico",
+
/*
+
* not useful to have this in openapi
+
*/
+
unpublished = true,
+
}]
+
async fn favicon(ctx: RequestContext<Context>) -> Result<Response<Body>, HttpError> {
+
instrument_handler(&ctx, async {
+
Ok(Response::builder()
+
.status(StatusCode::OK)
+
.header(http::header::CONTENT_TYPE, "image/x-icon")
+
.body(FAVICON.to_vec().into())?)
+
})
+
.await
+
}
+
+
/// Meta: get the openapi spec for this api
+
#[endpoint {
+
method = GET,
+
path = "/openapi",
+
/*
+
* not useful to have this in openapi
+
*/
+
unpublished = true,
+
}]
+
async fn openapi(ctx: RequestContext<Context>) -> OkCorsResponse<serde_json::Value> {
+
instrument_handler(&ctx, async {
+
let spec = (*ctx.context().spec).clone();
+
OkCors(spec).into()
+
})
+
.await
+
}
+
+
/// The real type that gets deserialized
+
#[derive(Debug, Deserialize, JsonSchema)]
+
#[serde(rename_all = "camelCase")]
+
pub struct MultiSubscribeQuery {
+
#[serde(default)]
+
pub wanted_subjects: HashSet<String>,
+
#[serde(default)]
+
pub wanted_subject_dids: HashSet<String>,
+
#[serde(default)]
+
pub wanted_sources: HashSet<String>,
+
}
+
/// The fake corresponding type for docs that dropshot won't freak out about a
+
/// vec for
+
#[derive(Deserialize, JsonSchema)]
+
#[allow(dead_code)]
+
#[serde(rename_all = "camelCase")]
+
struct MultiSubscribeQueryForDocs {
+
/// One or more at-uris to receive links about
+
///
+
/// The at-uri must be url-encoded
+
///
+
/// Pass this parameter multiple times to specify multiple collections, like
+
/// `wantedSubjects=[...]&wantedSubjects=[...]`
+
pub wanted_subjects: String,
+
/// One or more DIDs to receive links about
+
///
+
/// Pass this parameter multiple times to specify multiple collections
+
pub wanted_subject_dids: String,
+
/// One or more link sources to receive links about
+
///
+
/// TODO: docs about link sources
+
///
+
/// eg, a bluesky like's link source: `app.bsky.feed.like:subject.uri`
+
///
+
/// Pass this parameter multiple times to specify multiple sources
+
pub wanted_sources: String,
+
}
+
+
// The `SharedExtractor` implementation for Query<QueryType> describes how to
+
// construct an instance of `Query<QueryType>` from an HTTP request: namely, by
+
// parsing the query string to an instance of `QueryType`.
+
#[async_trait]
+
impl SharedExtractor for MultiSubscribeQuery {
+
async fn from_request<Context: ServerContext>(
+
ctx: &RequestContext<Context>,
+
) -> Result<MultiSubscribeQuery, HttpError> {
+
let raw_query = ctx.request.uri().query().unwrap_or("");
+
let q = serde_qs::from_str(raw_query).map_err(|e| {
+
HttpError::for_bad_request(None, format!("unable to parse query string: {}", e))
+
})?;
+
Ok(q)
+
}
+
+
fn metadata(body_content_type: ApiEndpointBodyContentType) -> ExtractorMetadata {
+
// HACK: query type switcheroo: passing MultiSubscribeQuery to
+
// `metadata` would "helpfully" panic because dropshot believes we can
+
// only have scalar types in a query.
+
//
+
// so instead we have a fake second type whose only job is to look the
+
// same as MultiSubscribeQuery exept that it has `String` instead of
+
// `Vec<String>`, which dropshot will accept, and generate ~close-enough
+
// docs for.
+
<Query<MultiSubscribeQueryForDocs> as SharedExtractor>::metadata(body_content_type)
+
}
}
#[derive(Deserialize, JsonSchema)]
···
path = "/subscribe",
}]
async fn subscribe(
-
ctx: RequestContext<broadcast::Sender<LinkEvent>>,
-
_qp: Query<QueryParams>,
+
ctx: RequestContext<Context>,
+
query: MultiSubscribeQuery,
upgraded: WebsocketConnection,
) -> dropshot::WebsocketChannelResult {
-
let mut ws = tokio_tungstenite::WebSocketStream::from_raw_socket(
+
let ws = tokio_tungstenite::WebSocketStream::from_raw_socket(
upgraded.into_inner(),
Role::Server,
None,
)
.await;
-
let mut sub = ctx.context().subscribe();
-
// TODO: pingpong
-
// TODO: filtering subscription
+
let b = ctx.context().b.subscribe();
-
loop {
-
match sub.recv().await {
-
Ok(link) => {
-
let ev = ClientEvent {
-
r#type: "link".to_string(),
-
link: link.into(),
-
};
-
let json = serde_json::to_string(&ev)?;
-
if let Err(e) = ws.send(Message::Text(json.into())).await {
-
eprintln!("client: failed to send event: {e:?}");
-
ws.close(None).await?; // TODO: do we need this one??
-
break;
-
}
-
}
-
Err(broadcast::error::RecvError::Closed) => {
-
ws.close(None).await?; // TODO: send reason
-
break;
-
}
-
Err(broadcast::error::RecvError::Lagged(_n_missed)) => {
-
eprintln!("client lagged, closing");
-
ws.close(None).await?; // TODO: send reason
-
break;
-
}
-
}
-
}
+
subscriber::subscribe(b, ws, query)
+
.await
+
.map_err(|e| format!("boo: {e:?}"))?;
+
Ok(())
}
+75
spacedust/src/subscriber.rs
···
+
use crate::ClientEvent;
+
use crate::LinkEvent;
+
use crate::server::MultiSubscribeQuery;
+
use futures::SinkExt;
+
use std::error::Error;
+
use tokio::sync::broadcast;
+
use tokio_tungstenite::{WebSocketStream, tungstenite::Message};
+
use dropshot::WebsocketConnectionRaw;
+
+
pub async fn subscribe(
+
mut sub: broadcast::Receiver<LinkEvent>,
+
mut ws: WebSocketStream<WebsocketConnectionRaw>,
+
query: MultiSubscribeQuery,
+
) -> Result<(), Box<dyn Error>> {
+
// TODO: pingpong
+
+
loop {
+
match sub.recv().await {
+
Ok(link) => {
+
+
// subject + subject DIDs are logical OR
+
let target_did = if link.target.starts_with("did:") {
+
link.target.clone()
+
} else {
+
let Some(rest) = link.target.strip_prefix("at://") else {
+
continue;
+
};
+
if let Some((did, _)) = rest.split_once("/") {
+
did
+
} else {
+
rest
+
}.to_string()
+
};
+
if !(query.wanted_subjects.contains(&link.target) || query.wanted_subject_dids.contains(&target_did) || query.wanted_subjects.is_empty() && query.wanted_subject_dids.is_empty()) {
+
// wowwww ^^ fix that
+
continue;
+
}
+
+
// subjects together with sources are logical AND
+
+
if !query.wanted_sources.is_empty() {
+
let undotted = link.path.strip_prefix('.').unwrap_or_else(|| {
+
eprintln!("link path did not have expected '.' prefix: {}", link.path);
+
""
+
});
+
let source = format!("{}:{undotted}", link.collection);
+
if !query.wanted_sources.contains(&source) {
+
continue;
+
}
+
}
+
+
let ev = ClientEvent {
+
kind: "link".to_string(),
+
link: link.into(),
+
};
+
let json = serde_json::to_string(&ev)?;
+
if let Err(e) = ws.send(Message::Text(json.into())).await {
+
eprintln!("client: failed to send event: {e:?}");
+
ws.close(None).await?; // TODO: do we need this one??
+
break;
+
}
+
}
+
Err(broadcast::error::RecvError::Closed) => {
+
ws.close(None).await?; // TODO: send reason
+
break;
+
}
+
Err(broadcast::error::RecvError::Lagged(_n_missed)) => {
+
eprintln!("client lagged, closing");
+
ws.close(None).await?; // TODO: send reason
+
break;
+
}
+
}
+
}
+
Ok(())
+
}
spacedust/static/favicon.ico

This is a binary file and will not be displayed.

+54
spacedust/static/index.html
···
+
<!doctype html>
+
<html lang="en">
+
<head>
+
<meta charset="utf-8" />
+
<title>Spacedust documentation</title>
+
<meta name="viewport" content="width=device-width, initial-scale=1" />
+
<meta name="description" content="API Documentation for Spacedust, a configurable ATProto notifications firehose" />
+
<style>
+
.custom-header {
+
height: 42px;
+
background-color: #221828;
+
box-shadow: inset 0 -1px 0 var(--scalar-border-color);
+
color: var(--scalar-color-1);
+
font-size: var(--scalar-font-size-3);
+
font-family: 'Iowan Old Style', 'Palatino Linotype', 'URW Palladio L', P052, serif;
+
padding: 0 18px;
+
justify-content: space-between;
+
}
+
.custom-header,
+
.custom-header nav {
+
display: flex;
+
align-items: center;
+
gap: 18px;
+
}
+
.custom-header a:hover {
+
color: var(--scalar-color-2);
+
}
+
</style>
+
</head>
+
<body>
+
<header class="custom-header scalar-app">
+
<p>
+
TODO: pdsls jetstream link
+
<a href="https://ufos.microcosm.blue">Launch 🛸 UFOs app</a>: Explore lexicons
+
</p>
+
<nav>
+
<b>a <a href="https://microcosm.blue">microcosm</a> project</b>
+
<a href="https://bsky.app/profile/microcosm.blue">@microcosm.blue</a>
+
<a href="https://github.com/at-microcosm">github</a>
+
</nav>
+
</header>
+
+
<script id="api-reference" type="application/json" data-url="/openapi""></script>
+
+
<script>
+
var configuration = {
+
theme: 'purple',
+
}
+
document.getElementById('api-reference').dataset.configuration = JSON.stringify(configuration)
+
</script>
+
+
<script src="https://cdn.jsdelivr.net/npm/@scalar/api-reference"></script>
+
</body>
+
</html>
+1 -1
ufos/src/index_html.rs
···
<html lang="en">
<head>
<meta charset="utf-8" />
-
<title>UFOs API Documentation</title>
+
<title>UFOs API documentation</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="description" content="API Documentation for UFOs: Samples and stats for all atproto lexicons." />
<style>