Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

crash debug: more logs and checks

there's a timeout on send, which is... weird. writes shouldn't ever be blocking for long.

Changed files
+121 -106
jetstream
src
ufos
+3 -2
Cargo.lock
···
[[package]]
name = "tokio-util"
-
version = "0.7.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
-
checksum = "6b9590b93e6fcc1739458317cccd391ad3955e2bde8913edf6f95f9e65a8f034"
dependencies = [
"bytes",
"futures-core",
···
"thiserror 2.0.12",
"tikv-jemallocator",
"tokio",
]
[[package]]
···
[[package]]
name = "tokio-util"
+
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
+
checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
dependencies = [
"bytes",
"futures-core",
···
"thiserror 2.0.12",
"tikv-jemallocator",
"tokio",
+
"tokio-util",
]
[[package]]
+74 -81
jetstream/src/lib.rs
···
retry_attempt += 1;
if let Ok((ws_stream, _)) = connect_async(req).await {
let t_connected = Instant::now();
-
log::trace!("jetstream connected. starting websocket task...");
if let Err(e) =
websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
.await
···
}
log::error!("Jetstream closed after encountering error: {e:?}");
} else {
-
log::error!("Jetstream connection closed cleanly");
}
if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
retry_attempt = 0;
}
}
if retry_attempt >= max_retries {
-
log::error!("hit max retries, bye");
break;
}
···
let mut closing_connection = false;
loop {
match socket_read.next().await {
-
Some(Ok(message)) => {
-
match message {
-
Message::Text(json) => {
-
let event: JetstreamEvent = match serde_json::from_str(&json) {
-
Ok(ev) => ev,
-
Err(e) => {
-
log::warn!(
-
"failed to parse json: {e:?} (from {})",
-
json.get(..24).unwrap_or(&json)
-
);
-
continue;
-
}
-
};
-
let event_cursor = event.cursor;
-
if let Some(last) = last_cursor {
-
if event_cursor <= *last {
-
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
-
continue;
-
}
}
-
if send_channel.send(event).await.is_err() {
-
// We can assume that all receivers have been dropped, so we can close
-
// the connection and exit the task.
-
log::info!(
"All receivers for the Jetstream connection have been dropped, closing connection."
);
-
socket_write.close().await?;
-
return Err(JetstreamEventError::ReceiverClosedError);
-
} else if let Some(last) = last_cursor.as_mut() {
-
*last = event_cursor;
-
}
}
-
Message::Binary(zstd_json) => {
-
let mut cursor = IoCursor::new(zstd_json);
-
let decoder = zstd::stream::Decoder::with_prepared_dictionary(
-
&mut cursor,
-
&dictionary,
-
)
-
.map_err(JetstreamEventError::CompressionDictionaryError)?;
-
let event: JetstreamEvent = match serde_json::from_reader(decoder) {
-
Ok(ev) => ev,
-
Err(e) => {
-
log::warn!("failed to parse json: {e:?}");
-
continue;
-
}
-
};
-
let event_cursor = event.cursor;
-
if let Some(last) = last_cursor {
-
if event_cursor <= *last {
-
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
-
continue;
-
}
}
-
if send_channel.send(event).await.is_err() {
-
// We can assume that all receivers have been dropped, so we can close
-
// the connection and exit the task.
-
log::info!(
"All receivers for the Jetstream connection have been dropped, closing connection."
);
-
socket_write.close().await?;
-
return Err(JetstreamEventError::ReceiverClosedError);
-
} else if let Some(last) = last_cursor.as_mut() {
-
*last = event_cursor;
-
}
}
-
Message::Ping(vec) => {
-
log::trace!("Ping recieved, responding");
-
socket_write
-
.send(Message::Pong(vec))
-
.await
-
.map_err(JetstreamEventError::PingPongError)?;
-
}
-
Message::Close(close_frame) => {
-
log::trace!("Close recieved. I guess we just log here?");
-
if let Some(close_frame) = close_frame {
-
let reason = close_frame.reason;
-
let code = close_frame.code;
-
log::trace!("Connection closed. Reason: {reason}, Code: {code}");
-
}
-
}
-
Message::Pong(pong) => {
-
let pong_payload = String::from_utf8(pong.to_vec())
-
.unwrap_or("Invalid payload".to_string());
-
log::trace!("Pong recieved. Payload: {pong_payload}");
}
-
Message::Frame(_) => (),
}
-
}
Some(Err(error)) => {
log::error!("Web socket error: {error}");
closing_connection = true;
···
retry_attempt += 1;
if let Ok((ws_stream, _)) = connect_async(req).await {
let t_connected = Instant::now();
+
log::info!("jetstream connected. starting websocket task...");
if let Err(e) =
websocket_task(dict, ws_stream, send_channel.clone(), &mut last_cursor)
.await
···
}
log::error!("Jetstream closed after encountering error: {e:?}");
} else {
+
log::warn!("Jetstream connection closed cleanly");
}
if t_connected.elapsed() > Duration::from_secs(success_threshold_s) {
+
log::warn!("Jetstream: more than {success_threshold_s}s since last reconnect, reconnecting immediately.");
retry_attempt = 0;
}
}
if retry_attempt >= max_retries {
+
log::error!("jetstream: hit max retries, bye");
break;
}
···
let mut closing_connection = false;
loop {
match socket_read.next().await {
+
Some(Ok(message)) => match message {
+
Message::Text(json) => {
+
let event: JetstreamEvent = match serde_json::from_str(&json) {
+
Ok(ev) => ev,
+
Err(e) => {
+
log::warn!(
+
"failed to parse json: {e:?} (from {})",
+
json.get(..24).unwrap_or(&json)
+
);
+
continue;
+
}
+
};
+
let event_cursor = event.cursor;
+
if let Some(last) = last_cursor {
+
if event_cursor <= *last {
+
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
+
continue;
}
+
}
+
if send_channel.send(event).await.is_err() {
+
log::warn!(
"All receivers for the Jetstream connection have been dropped, closing connection."
);
+
socket_write.close().await?;
+
return Err(JetstreamEventError::ReceiverClosedError);
+
} else if let Some(last) = last_cursor.as_mut() {
+
*last = event_cursor;
}
+
}
+
Message::Binary(zstd_json) => {
+
let mut cursor = IoCursor::new(zstd_json);
+
let decoder =
+
zstd::stream::Decoder::with_prepared_dictionary(&mut cursor, &dictionary)
+
.map_err(JetstreamEventError::CompressionDictionaryError)?;
+
let event: JetstreamEvent = match serde_json::from_reader(decoder) {
+
Ok(ev) => ev,
+
Err(e) => {
+
log::warn!("failed to parse json: {e:?}");
+
continue;
+
}
+
};
+
let event_cursor = event.cursor;
+
if let Some(last) = last_cursor {
+
if event_cursor <= *last {
+
log::warn!("event cursor {event_cursor:?} was not newer than the last one: {last:?}. dropping event.");
+
continue;
}
+
}
+
if send_channel.send(event).await.is_err() {
+
log::warn!(
"All receivers for the Jetstream connection have been dropped, closing connection."
);
+
socket_write.close().await?;
+
return Err(JetstreamEventError::ReceiverClosedError);
+
} else if let Some(last) = last_cursor.as_mut() {
+
*last = event_cursor;
}
+
}
+
Message::Ping(vec) => {
+
log::trace!("Ping recieved, responding");
+
socket_write
+
.send(Message::Pong(vec))
+
.await
+
.map_err(JetstreamEventError::PingPongError)?;
+
}
+
Message::Close(close_frame) => {
+
log::trace!("Close recieved. I guess we just log here?");
+
if let Some(close_frame) = close_frame {
+
let reason = close_frame.reason;
+
let code = close_frame.code;
+
log::trace!("Connection closed. Reason: {reason}, Code: {code}");
}
+
}
+
Message::Pong(pong) => {
+
let pong_payload =
+
String::from_utf8(pong.to_vec()).unwrap_or("Invalid payload".to_string());
+
log::trace!("Pong recieved. Payload: {pong_payload}");
}
+
Message::Frame(_) => (),
+
},
Some(Err(error)) => {
log::error!("Web socket error: {error}");
closing_connection = true;
+1
ufos/Cargo.toml
···
sha2 = "0.10.9"
thiserror = "2.0.12"
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.6.0"
···
sha2 = "0.10.9"
thiserror = "2.0.12"
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
+
tokio-util = "0.7.15"
[target.'cfg(not(target_env = "msvc"))'.dependencies]
tikv-jemallocator = "0.6.0"
+3 -8
ufos/src/consumer.rs
···
let mut batcher = Batcher::new(jetstream_receiver, batch_sender, sketch_secret);
tokio::task::spawn(async move {
let r = batcher.run().await;
-
log::info!("batcher ended: {r:?}");
});
Ok(batch_reciever)
}
···
pub async fn run(&mut self) -> anyhow::Result<()> {
// TODO: report errors *from here* probably, since this gets shipped off into a spawned task that might just vanish
loop {
-
match timeout(
-
Duration::from_millis(30_000),
-
self.jetstream_receiver.recv(),
-
)
-
.await
-
{
Err(_elapsed) => self.no_events_step().await?,
Ok(Some(event)) => self.handle_event(event).await?,
Ok(None) => anyhow::bail!("channel closed"),
···
Some(Ok(t)) => format!("{:?}", t),
Some(Err(e)) => format!("+{:?}", e.duration()),
};
-
log::trace!(
"sending batch now from {beginning}, {}, queue capacity: {}, referrer: {referrer}",
if small { "small" } else { "full" },
self.batch_sender.capacity(),
···
let mut batcher = Batcher::new(jetstream_receiver, batch_sender, sketch_secret);
tokio::task::spawn(async move {
let r = batcher.run().await;
+
log::warn!("batcher ended: {r:?}");
});
Ok(batch_reciever)
}
···
pub async fn run(&mut self) -> anyhow::Result<()> {
// TODO: report errors *from here* probably, since this gets shipped off into a spawned task that might just vanish
loop {
+
match timeout(Duration::from_secs_f64(30.), self.jetstream_receiver.recv()).await {
Err(_elapsed) => self.no_events_step().await?,
Ok(Some(event)) => self.handle_event(event).await?,
Ok(None) => anyhow::bail!("channel closed"),
···
Some(Ok(t)) => format!("{:?}", t),
Some(Err(e)) => format!("+{:?}", e.duration()),
};
+
log::info!(
"sending batch now from {beginning}, {}, queue capacity: {}, referrer: {referrer}",
if small { "small" } else { "full" },
self.batch_sender.capacity(),
+2
ufos/src/error.rs
···
JoinError(#[from] tokio::task::JoinError),
#[error("Background task already started")]
BackgroundAlreadyStarted,
}
···
JoinError(#[from] tokio::task::JoinError),
#[error("Background task already started")]
BackgroundAlreadyStarted,
+
#[error("Batch sender exited")]
+
BatchSenderExited,
}
+2 -2
ufos/src/main.rs
···
let rolling = write_store
.background_tasks(args.reroll)?
.run(args.backfill);
-
let storing = write_store.receive_batches(batches);
let stating = do_update_stuff(read_store);
tokio::select! {
z = serving => log::warn!("serve task ended: {z:?}"),
z = rolling => log::warn!("rollup task ended: {z:?}"),
-
z = storing => log::warn!("storage task ended: {z:?}"),
z = stating => log::warn!("status task ended: {z:?}"),
};
···
let rolling = write_store
.background_tasks(args.reroll)?
.run(args.backfill);
+
let consuming = write_store.receive_batches(batches);
let stating = do_update_stuff(read_store);
tokio::select! {
z = serving => log::warn!("serve task ended: {z:?}"),
z = rolling => log::warn!("rollup task ended: {z:?}"),
+
z = consuming => log::warn!("consuming task ended: {z:?}"),
z = stating => log::warn!("status task ended: {z:?}"),
};
+36 -13
ufos/src/storage.rs
···
use jetstream::exports::{Did, Nsid};
use std::collections::{HashMap, HashSet};
use std::path::Path;
use tokio::sync::mpsc::Receiver;
pub type StorageResult<T> = Result<T, StorageError>;
···
Self: Sized;
}
-
pub trait StoreWriter<B: StoreBackground>: Send + Sync
where
Self: 'static,
{
fn background_tasks(&mut self, reroll: bool) -> StorageResult<B>;
-
fn receive_batches<const LIMIT: usize>(
-
mut self,
mut batches: Receiver<EventBatch<LIMIT>>,
-
) -> impl std::future::Future<Output = StorageResult<()>> + Send
-
where
-
Self: Sized,
-
{
-
async {
-
tokio::task::spawn_blocking(move || {
-
while let Some(event_batch) = batches.blocking_recv() {
-
self.insert_batch(event_batch)?;
}
-
Ok::<(), StorageError>(())
})
-
.await?
}
}
fn insert_batch<const LIMIT: usize>(
···
use jetstream::exports::{Did, Nsid};
use std::collections::{HashMap, HashSet};
use std::path::Path;
+
use std::time::{Duration, SystemTime};
use tokio::sync::mpsc::Receiver;
+
use tokio_util::sync::CancellationToken;
pub type StorageResult<T> = Result<T, StorageError>;
···
Self: Sized;
}
+
#[async_trait]
+
pub trait StoreWriter<B: StoreBackground>: Clone + Send + Sync
where
Self: 'static,
{
fn background_tasks(&mut self, reroll: bool) -> StorageResult<B>;
+
async fn receive_batches<const LIMIT: usize>(
+
self,
mut batches: Receiver<EventBatch<LIMIT>>,
+
) -> StorageResult<()> {
+
while let Some(event_batch) = batches.recv().await {
+
let token = CancellationToken::new();
+
let cancelled = token.clone();
+
tokio::spawn(async move {
+
let started = SystemTime::now();
+
let mut concerned = false;
+
loop {
+
tokio::select! {
+
_ = tokio::time::sleep(Duration::from_secs_f64(1.)) => {
+
log::warn!("taking a long time to insert an event batch ({:?})...", started.elapsed());
+
concerned = true;
+
}
+
_ = cancelled.cancelled() => {
+
if concerned {
+
log::warn!("finally inserted slow event batch after {:?}", started.elapsed());
+
}
+
break
+
}
+
}
}
+
});
+
tokio::task::spawn_blocking({
+
let mut me = self.clone();
+
move || {
+
let _guard = token.drop_guard();
+
me.insert_batch(event_batch)
+
}
})
+
.await??;
}
+
+
Err(StorageError::BatchSenderExited)
}
fn insert_batch<const LIMIT: usize>(