Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

handle shutdown in all tasks

Changed files
+47 -12
slingshot
+2
slingshot/src/error.rs
···
ServerTaskError(#[from] ServerError),
#[error(transparent)]
IdentityTaskError(#[from] IdentityError),
+
#[error("firehose cache failed to close: {0}")]
+
FirehoseCacheCloseError(foyer::Error),
}
#[derive(Debug, Error)]
+5 -1
slingshot/src/firehose_cache.rs
···
.memory(64 * 2_usize.pow(20))
.with_weighter(|k: &String, v| k.len() + std::mem::size_of_val(v))
.storage(Engine::large())
-
.with_device_options(DirectFsDeviceOptions::new(cache_dir))
+
.with_device_options(
+
DirectFsDeviceOptions::new(cache_dir)
+
.with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something)
+
.with_file_size(16 * 2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
+
)
.build()
.await
.map_err(|e| format!("foyer setup error: {e:?}"))?;
+17 -3
slingshot/src/identity.rs
···
/// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this
use std::time::Duration;
use tokio::sync::Mutex;
+
use tokio_util::sync::CancellationToken;
use crate::error::IdentityError;
use atrium_api::{
···
.with_name("identity")
.memory(16 * 2_usize.pow(20))
.with_weighter(|k, v| std::mem::size_of_val(k) + std::mem::size_of_val(v))
-
.storage(Engine::large())
-
.with_device_options(DirectFsDeviceOptions::new(cache_dir))
+
.storage(Engine::small())
+
.with_device_options(
+
DirectFsDeviceOptions::new(cache_dir)
+
.with_capacity(2_usize.pow(30)) // TODO: configurable (1GB to have something)
+
.with_file_size(2_usize.pow(20)), // note: this does limit the max cached item size, warning jumbo records
+
)
.build()
.await?;
···
}
/// run the refresh queue consumer
-
pub async fn run_refresher(&self) -> Result<(), IdentityError> {
+
pub async fn run_refresher(&self, shutdown: CancellationToken) -> Result<(), IdentityError> {
let _guard = self
.refresher
.try_lock()
.expect("there to only be one refresher running");
loop {
+
if shutdown.is_cancelled() {
+
log::info!("identity refresher: exiting for shutdown: closing cache...");
+
if let Err(e) = self.cache.close().await {
+
log::error!("cache close errored: {e}");
+
} else {
+
log::info!("identity cache closed.")
+
}
+
return Ok(());
+
}
let Some(task_key) = self.peek_refresh().await else {
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
continue;
+12 -3
slingshot/src/main.rs
···
.map_err(|e| format!("identity setup failed: {e:?}"))?;
log::info!("identity service ready.");
let identity_refresher = identity.clone();
+
let identity_shutdown = shutdown.clone();
tasks.spawn(async move {
-
identity_refresher.run_refresher().await?;
+
identity_refresher.run_refresher(identity_shutdown).await?;
Ok(())
});
···
});
let consumer_shutdown = shutdown.clone();
+
let consumer_cache = cache.clone();
tasks.spawn(async move {
consume(
args.jetstream,
None,
args.jetstream_no_zstd,
consumer_shutdown,
-
cache,
+
consumer_cache,
)
.await?;
Ok(())
···
}
}
+
tasks.spawn(async move {
+
cache
+
.close()
+
.await
+
.map_err(MainTaskError::FirehoseCacheCloseError)
+
});
+
tokio::select! {
_ = async {
while let Some(completed) = tasks.join_next().await {
log::info!("shutdown: task completed: {completed:?}");
}
} => {},
-
_ = tokio::time::sleep(std::time::Duration::from_secs(3)) => {
+
_ = tokio::time::sleep(std::time::Duration::from_secs(30)) => {
log::info!("shutdown: not all tasks completed on time. aborting...");
tasks.shutdown().await;
},
+11 -5
slingshot/src/server.rs
···
host: Option<String>,
acme_contact: Option<String>,
certs: Option<PathBuf>,
-
_shutdown: CancellationToken,
+
shutdown: CancellationToken,
) -> Result<(), ServerError> {
let repo = Arc::new(repo);
let api_service = OpenApiService::new(
···
}
let auto_cert = auto_cert.build().map_err(ServerError::AcmeBuildError)?;
-
run(TcpListener::bind("0.0.0.0:443").acme(auto_cert), app).await
+
run(
+
TcpListener::bind("0.0.0.0:443").acme(auto_cert),
+
app,
+
shutdown,
+
)
+
.await
} else {
-
run(TcpListener::bind("127.0.0.1:3000"), app).await
+
run(TcpListener::bind("127.0.0.1:3000"), app, shutdown).await
}
}
-
async fn run<L>(listener: L, app: Route) -> Result<(), ServerError>
+
async fn run<L>(listener: L, app: Route, shutdown: CancellationToken) -> Result<(), ServerError>
where
L: Listener + 'static,
{
···
.with(Tracing);
Server::new(listener)
.name("slingshot")
-
.run(app)
+
.run_with_graceful_shutdown(app, shutdown.cancelled(), None)
.await
.map_err(ServerError::ServerExited)
+
.inspect(|()| log::info!("server ended. goodbye."))
}