Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

upstream getrecord

+19 -57
Cargo.lock
···
[[package]]
name = "atrium-api"
-
version = "0.25.3"
-
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
-
dependencies = [
-
"atrium-common 0.1.2 (git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits)",
-
"atrium-xrpc 0.12.3 (git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits)",
-
"chrono",
-
"http",
-
"ipld-core",
-
"langtag",
-
"regex",
-
"serde",
-
"serde_bytes",
-
"serde_json",
-
"thiserror 1.0.69",
-
"trait-variant",
-
]
-
-
[[package]]
-
name = "atrium-api"
version = "0.25.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46355d3245edc7b3160b2a45fe55d09a6963ebd3eee0252feb6b72fb0eb71463"
dependencies = [
-
"atrium-common 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
-
"atrium-xrpc 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
+
"atrium-common",
+
"atrium-xrpc",
"chrono",
"http",
"ipld-core",
···
]
[[package]]
-
name = "atrium-common"
-
version = "0.1.2"
-
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
-
dependencies = [
-
"dashmap",
-
"lru",
-
"moka",
-
"thiserror 1.0.69",
-
"tokio",
-
"trait-variant",
-
"web-time",
-
]
-
-
[[package]]
name = "atrium-identity"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c9e2d42bb4dbea038f4f5f45e3af2a89d61a9894a75f06aa550b74a60d2be380"
dependencies = [
-
"atrium-api 0.25.4",
-
"atrium-common 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
-
"atrium-xrpc 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
+
"atrium-api",
+
"atrium-common",
+
"atrium-xrpc",
"serde",
"serde_html_form",
"serde_json",
···
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca22dc4eaf77fd9bf050b21192ac58cd654a437d28e000ec114ebd93a51d36f5"
dependencies = [
-
"atrium-api 0.25.4",
-
"atrium-common 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+
"atrium-api",
+
"atrium-common",
"atrium-identity",
-
"atrium-xrpc 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
+
"atrium-xrpc",
"base64 0.22.1",
"chrono",
"dashmap",
···
]
[[package]]
-
name = "atrium-xrpc"
-
version = "0.12.3"
-
source = "git+https://github.com/uniphil/atrium?branch=fix%2Fnsid-allow-nonleading-name-digits#c4364f318d337bbc3e3e3aaf97c9f971e95f5f7e"
-
dependencies = [
-
"http",
-
"serde",
-
"serde_html_form",
-
"serde_json",
-
"thiserror 1.0.69",
-
"trait-variant",
-
]
-
-
[[package]]
name = "auto_enums"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
···
dependencies = [
"anyhow",
"async-trait",
-
"atrium-api 0.25.3",
+
"atrium-api",
"chrono",
"clap",
"futures-util",
···
name = "slingshot"
version = "0.1.0"
dependencies = [
+
"atrium-api",
+
"atrium-common",
+
"atrium-identity",
+
"atrium-oauth",
"clap",
"ctrlc",
"env_logger",
"foyer",
+
"hickory-resolver",
"jetstream",
"log",
"metrics",
"metrics-exporter-prometheus 0.17.2",
"poem",
"poem-openapi",
+
"reqwest",
"serde",
"serde_json",
"thiserror 2.0.12",
+
"time",
"tokio",
"tokio-util",
+
"url",
[[package]]
···
name = "who-am-i"
version = "0.1.0"
dependencies = [
-
"atrium-api 0.25.4",
-
"atrium-common 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
+
"atrium-api",
+
"atrium-common",
"atrium-identity",
"atrium-oauth",
"axum",
+1 -1
jetstream/Cargo.toml
···
[dependencies]
async-trait = "0.1.83"
-
atrium-api = { git = "https://github.com/uniphil/atrium", branch = "fix/nsid-allow-nonleading-name-digits", default-features = false, features = [
+
atrium-api = { version = "0.25.4", default-features = false, features = [
"namespace-appbsky",
] }
tokio = { version = "1.44.2", features = ["full", "sync", "time"] }
+8
slingshot/Cargo.toml
···
edition = "2024"
[dependencies]
+
atrium-api = { version = "0.25.4", default-features = false }
+
atrium-common = "0.1.2"
+
atrium-identity = "0.1.5"
+
atrium-oauth = "0.1.3"
clap = { version = "4.5.41", features = ["derive"] }
ctrlc = "3.4.7"
env_logger = "0.11.8"
foyer = { version = "0.18.0", features = ["serde"] }
+
hickory-resolver = "0.25.2"
jetstream = { path = "../jetstream", features = ["metrics"] }
log = "0.4.27"
metrics = "0.24.2"
metrics-exporter-prometheus = { version = "0.17.1", features = ["http-listener"] }
poem = "3.1.12"
poem-openapi = { version = "5.1.16", features = ["scalar"] }
+
reqwest = { version = "0.12.22", features = ["json"] }
serde = { version = "1.0.219", features = ["derive"] }
serde_json = { version = "1.0.141", features = ["raw_value"] }
thiserror = "2.0.12"
+
time = { version = "0.3.41", features = ["serde"] }
tokio = { version = "1.47.0", features = ["full"] }
tokio-util = "0.7.15"
+
url = "2.5.4"
+7
slingshot/readme.md
···
+
# slingshot: atproto record edge cache
+
+
local dev running:
+
+
```bash
+
RUST_LOG=info,slingshot=trace ulimit -n 4096 && RUST_LOG=info cargo run -- --jetstream us-east-1 --cache-dir ./foyer
+
```
+49
slingshot/src/error.rs
···
}
#[derive(Debug, Error)]
+
pub enum IdentityError {
+
#[error("whatever: {0}")]
+
WhateverError(String),
+
#[error("bad DID: {0}")]
+
BadDid(&'static str),
+
#[error("identity types got mixed up: {0}")]
+
IdentityValTypeMixup(String),
+
#[error("foyer error: {0}")]
+
FoyerError(#[from] foyer::Error),
+
+
#[error("failed to resolve: {0}")]
+
ResolutionFailed(#[from] atrium_identity::Error),
+
// #[error("identity resolved but no handle found for user")]
+
// NoHandle,
+
#[error("found handle {0:?} but it appears invalid: {1}")]
+
InvalidHandle(String, &'static str),
+
+
#[error("could not convert atrium did doc to partial mini doc: {0}")]
+
BadDidDoc(String),
+
+
#[error("wrong key for clearing refresh queue: {0}")]
+
RefreshQueueKeyError(&'static str),
+
}
+
+
#[derive(Debug, Error)]
pub enum MainTaskError {
#[error(transparent)]
ConsumerTaskError(#[from] ConsumerError),
#[error(transparent)]
ServerTaskError(#[from] ServerError),
+
#[error(transparent)]
+
IdentityTaskError(#[from] IdentityError),
+
}
+
+
#[derive(Debug, Error)]
+
pub enum RecordError {
+
#[error("identity error: {0}")]
+
IdentityError(#[from] IdentityError),
+
#[error("repo could not be validated as either a DID or an atproto handle")]
+
BadRepo,
+
#[error("could not get record: {0}")]
+
NotFound(&'static str),
+
#[error("could nto parse pds url: {0}")]
+
UrlParseError(#[from] url::ParseError),
+
#[error("reqwest send failed: {0}")]
+
SendError(reqwest::Error),
+
#[error("reqwest raised for status: {0}")]
+
StatusError(reqwest::Error),
+
#[error("reqwest failed to parse json: {0}")]
+
ParseJsonError(reqwest::Error),
+
#[error("upstream getRecord did not include a CID")]
+
MissingUpstreamCid,
+
#[error("upstream CID was not valid: {0}")]
+
BadUpstreamCid(String),
}
+2 -2
slingshot/src/firehose_cache.rs
···
use std::path::Path;
pub async fn firehose_cache(
-
dir: impl AsRef<Path>,
+
cache_dir: impl AsRef<Path>,
) -> Result<HybridCache<String, CachedRecord>, String> {
let cache = HybridCacheBuilder::new()
.with_name("firehose")
.memory(64 * 2_usize.pow(20))
.with_weighter(|k: &String, v| k.len() + std::mem::size_of_val(v))
.storage(Engine::large())
-
.with_device_options(DirectFsDeviceOptions::new(dir))
+
.with_device_options(DirectFsDeviceOptions::new(cache_dir))
.build()
.await
.map_err(|e| format!("foyer setup error: {e:?}"))?;
+510
slingshot/src/identity.rs
···
+
use hickory_resolver::{ResolveError, TokioResolver};
+
use std::collections::{HashSet, VecDeque};
+
use std::path::Path;
+
use std::sync::Arc;
+
/// for now we're gonna just keep doing more cache
+
///
+
/// plc.director x foyer, ttl kept with data, refresh deferred to background on fetch
+
///
+
/// things we need:
+
///
+
/// 1. handle -> DID resolution: getRecord must accept a handle for `repo` param
+
/// 2. DID -> PDS resolution: so we know where to getRecord
+
/// 3. DID -> handle resolution: for bidirectional handle validation and in case we want to offer this
+
use std::time::Duration;
+
use tokio::sync::Mutex;
+
+
use crate::error::IdentityError;
+
use atrium_api::{
+
did_doc::DidDocument,
+
types::string::{Did, Handle},
+
};
+
use atrium_common::resolver::Resolver;
+
use atrium_identity::{
+
did::{CommonDidResolver, CommonDidResolverConfig, DEFAULT_PLC_DIRECTORY_URL},
+
handle::{AtprotoHandleResolver, AtprotoHandleResolverConfig, DnsTxtResolver},
+
};
+
use atrium_oauth::DefaultHttpClient; // it's probably not worth bringing all of atrium_oauth for this but
+
use foyer::{DirectFsDeviceOptions, Engine, HybridCache, HybridCacheBuilder};
+
use serde::{Deserialize, Serialize};
+
use time::UtcDateTime;
+
+
/// once we have something resolved, don't re-resolve until after this period
+
const MIN_TTL: Duration = Duration::from_secs(4 * 3600); // probably shoudl have a max ttl
+
const MIN_NOT_FOUND_TTL: Duration = Duration::from_secs(60);
+
+
#[derive(Debug, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)]
+
enum IdentityKey {
+
Handle(Handle),
+
Did(Did),
+
}
+
+
#[derive(Debug, Serialize, Deserialize)]
+
struct IdentityVal(UtcDateTime, IdentityData);
+
+
#[derive(Debug, Serialize, Deserialize)]
+
enum IdentityData {
+
NotFound,
+
Did(Did),
+
Doc(PartialMiniDoc),
+
}
+
+
/// partial representation of a com.bad-example.identity mini atproto doc
+
///
+
/// partial because the handle is not verified
+
#[derive(Debug, Clone, Serialize, Deserialize)]
+
struct PartialMiniDoc {
+
/// an atproto handle (**unverified**)
+
///
+
/// the first valid atproto handle from the did doc's aka
+
unverified_handle: Handle,
+
/// the did's atproto pds url (TODO: type this?)
+
///
+
/// note: atrium *does* actually parse it into a URI, it just doesn't return
+
/// that for some reason
+
pds: String,
+
/// for now we're just pulling this straight from the did doc
+
///
+
/// would be nice to type and validate it
+
///
+
/// this is the publicKeyMultibase from the did doc.
+
/// legacy key encoding not supported.
+
/// `id`, `type`, and `controller` must be checked, but aren't stored.
+
signing_key: String,
+
}
+
+
impl TryFrom<DidDocument> for PartialMiniDoc {
+
type Error = String;
+
fn try_from(did_doc: DidDocument) -> Result<Self, Self::Error> {
+
// must use the first valid handle
+
let mut unverified_handle = None;
+
let Some(ref doc_akas) = did_doc.also_known_as else {
+
return Err("did doc missing `also_known_as`".to_string());
+
};
+
for aka in doc_akas {
+
let Some(maybe_handle) = aka.strip_prefix("at://") else {
+
continue;
+
};
+
let Ok(valid_handle) = Handle::new(maybe_handle.to_string()) else {
+
continue;
+
};
+
unverified_handle = Some(valid_handle);
+
break;
+
}
+
let Some(unverified_handle) = unverified_handle else {
+
return Err("no valid atproto handles in `also_known_as`".to_string());
+
};
+
+
// atrium seems to get service endpoint getters
+
let Some(pds) = did_doc.get_pds_endpoint() else {
+
return Err("no valid pds service found".to_string());
+
};
+
+
// TODO can't use atrium's get_signing_key() becuase it fails to check type and controller
+
// so if we check those and reject it, we might miss a later valid key in the array
+
// (todo is to fix atrium)
+
// actually: atrium might be flexible for legacy reps. for now we're rejecting legacy rep.
+
+
// must use the first valid signing key
+
let mut signing_key = None;
+
let Some(verification_methods) = did_doc.verification_method else {
+
return Err("no verification methods found".to_string());
+
};
+
for method in verification_methods {
+
if method.id != format!("{}#atproto", did_doc.id) {
+
continue;
+
}
+
if method.r#type != "Multikey" {
+
continue;
+
}
+
if method.controller != did_doc.id {
+
continue;
+
}
+
let Some(key) = method.public_key_multibase else {
+
continue;
+
};
+
signing_key = Some(key);
+
break;
+
}
+
let Some(signing_key) = signing_key else {
+
return Err("no valid atproto signing key found in verification methods".to_string());
+
};
+
+
Ok(PartialMiniDoc {
+
unverified_handle,
+
pds,
+
signing_key,
+
})
+
}
+
}
+
+
/// multi-producer *single-consumer* queue structures (wrap in arc-mutex plz)
+
///
+
/// the hashset allows testing for presense of items in the queue.
+
/// this has absolutely no support for multiple queue consumers.
+
#[derive(Debug, Default)]
+
struct RefreshQueue {
+
queue: VecDeque<IdentityKey>,
+
items: HashSet<IdentityKey>,
+
}
+
+
#[derive(Clone)]
+
pub struct Identity {
+
handle_resolver: Arc<AtprotoHandleResolver<HickoryDnsTxtResolver, DefaultHttpClient>>,
+
did_resolver: Arc<CommonDidResolver<DefaultHttpClient>>,
+
cache: HybridCache<IdentityKey, IdentityVal>,
+
/// multi-producer *single consumer* queue
+
refresh_queue: Arc<Mutex<RefreshQueue>>,
+
/// just a lock to ensure only one refresher (queue consumer) is running (to be improved with a better refresher)
+
refresher: Arc<Mutex<()>>,
+
}
+
+
impl Identity {
+
pub async fn new(cache_dir: impl AsRef<Path>) -> Result<Self, IdentityError> {
+
let http_client = Arc::new(DefaultHttpClient::default());
+
let handle_resolver = AtprotoHandleResolver::new(AtprotoHandleResolverConfig {
+
dns_txt_resolver: HickoryDnsTxtResolver::new().unwrap(),
+
http_client: http_client.clone(),
+
});
+
let did_resolver = CommonDidResolver::new(CommonDidResolverConfig {
+
plc_directory_url: DEFAULT_PLC_DIRECTORY_URL.to_string(),
+
http_client: http_client.clone(),
+
});
+
+
let cache = HybridCacheBuilder::new()
+
.with_name("identity")
+
.memory(16 * 2_usize.pow(20))
+
.with_weighter(|k, v| std::mem::size_of_val(k) + std::mem::size_of_val(v))
+
.storage(Engine::large())
+
.with_device_options(DirectFsDeviceOptions::new(cache_dir))
+
.build()
+
.await?;
+
+
Ok(Self {
+
handle_resolver: Arc::new(handle_resolver),
+
did_resolver: Arc::new(did_resolver),
+
cache,
+
refresh_queue: Default::default(),
+
refresher: Default::default(),
+
})
+
}
+
+
/// Resolve (and verify!) an atproto handle to a DID
+
///
+
/// The result can be stale
+
///
+
/// `None` if the handle can't be found or verification fails
+
pub async fn handle_to_did(&self, handle: Handle) -> Result<Option<Did>, IdentityError> {
+
let Some(did) = self.handle_to_unverified_did(&handle).await? else {
+
return Ok(None);
+
};
+
let Some(doc) = self.did_to_partial_mini_doc(&did).await? else {
+
return Ok(None);
+
};
+
if doc.unverified_handle != handle {
+
return Ok(None);
+
}
+
Ok(Some(did))
+
}
+
+
/// Resolve (and verify!) a DID to a pds url
+
///
+
/// This *also* incidentally resolves and verifies the handle, which might
+
/// make it slower than expected
+
pub async fn did_to_pds(&self, did: Did) -> Result<Option<String>, IdentityError> {
+
let Some(mini_doc) = self.did_to_partial_mini_doc(&did).await? else {
+
return Ok(None);
+
};
+
Ok(Some(mini_doc.pds))
+
}
+
+
/// Resolve (and cache but **not verify**) a handle to a DID
+
async fn handle_to_unverified_did(
+
&self,
+
handle: &Handle,
+
) -> Result<Option<Did>, IdentityError> {
+
let key = IdentityKey::Handle(handle.clone());
+
let entry = self
+
.cache
+
.fetch(key.clone(), {
+
let handle = handle.clone();
+
let resolver = self.handle_resolver.clone();
+
|| async move {
+
match resolver.resolve(&handle).await {
+
Ok(did) => Ok(IdentityVal(UtcDateTime::now(), IdentityData::Did(did))),
+
Err(atrium_identity::Error::NotFound) => {
+
Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
+
}
+
Err(other) => Err(foyer::Error::Other(Box::new(
+
IdentityError::ResolutionFailed(other),
+
))),
+
}
+
}
+
})
+
.await?;
+
+
let now = UtcDateTime::now();
+
let IdentityVal(last_fetch, data) = entry.value();
+
match data {
+
IdentityData::Doc(_) => {
+
log::error!("identity value mixup: got a doc from a handle key (should be a did)");
+
Err(IdentityError::IdentityValTypeMixup(handle.to_string()))
+
}
+
IdentityData::NotFound => {
+
if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
+
self.queue_refresh(key).await;
+
}
+
Ok(None)
+
}
+
IdentityData::Did(did) => {
+
if (now - *last_fetch) >= MIN_TTL {
+
self.queue_refresh(key).await;
+
}
+
Ok(Some(did.clone()))
+
}
+
}
+
}
+
+
/// Fetch (and cache) a partial mini doc from a did
+
async fn did_to_partial_mini_doc(
+
&self,
+
did: &Did,
+
) -> Result<Option<PartialMiniDoc>, IdentityError> {
+
let key = IdentityKey::Did(did.clone());
+
let entry = self
+
.cache
+
.fetch(key.clone(), {
+
let did = did.clone();
+
let resolver = self.did_resolver.clone();
+
|| async move {
+
match resolver.resolve(&did).await {
+
Ok(did_doc) => {
+
// TODO: fix in atrium: should verify id is did
+
if did_doc.id != did.to_string() {
+
return Err(foyer::Error::other(Box::new(
+
IdentityError::BadDidDoc(
+
"did doc's id did not match did".to_string(),
+
),
+
)));
+
}
+
let mini_doc = did_doc.try_into().map_err(|e| {
+
foyer::Error::Other(Box::new(IdentityError::BadDidDoc(e)))
+
})?;
+
Ok(IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)))
+
}
+
Err(atrium_identity::Error::NotFound) => {
+
Ok(IdentityVal(UtcDateTime::now(), IdentityData::NotFound))
+
}
+
Err(other) => Err(foyer::Error::Other(Box::new(
+
IdentityError::ResolutionFailed(other),
+
))),
+
}
+
}
+
})
+
.await?;
+
+
let now = UtcDateTime::now();
+
let IdentityVal(last_fetch, data) = entry.value();
+
match data {
+
IdentityData::Did(_) => {
+
log::error!("identity value mixup: got a did from a did key (should be a doc)");
+
Err(IdentityError::IdentityValTypeMixup(did.to_string()))
+
}
+
IdentityData::NotFound => {
+
if (now - *last_fetch) >= MIN_NOT_FOUND_TTL {
+
self.queue_refresh(key).await;
+
}
+
Ok(None)
+
}
+
IdentityData::Doc(mini_did) => {
+
if (now - *last_fetch) >= MIN_TTL {
+
self.queue_refresh(key).await;
+
}
+
Ok(Some(mini_did.clone()))
+
}
+
}
+
}
+
+
/// put a refresh task on the queue
+
///
+
/// this can be safely called from multiple concurrent tasks
+
async fn queue_refresh(&self, key: IdentityKey) {
+
// todo: max queue size
+
let mut q = self.refresh_queue.lock().await;
+
if !q.items.contains(&key) {
+
q.items.insert(key.clone());
+
q.queue.push_back(key);
+
}
+
}
+
+
/// find out what's next in the queue. concurrent consumers are not allowed.
+
///
+
/// intent is to leave the item in the queue while refreshing, so that a
+
/// producer will not re-add it if it's in progress. there's definitely
+
/// better ways to do this, but this is ~simple for as far as a single
+
/// consumer can take us.
+
///
+
/// we could take it from the queue but leave it in the set and remove from
+
/// set later, but splitting them apart feels more bug-prone.
+
async fn peek_refresh(&self) -> Option<IdentityKey> {
+
let q = self.refresh_queue.lock().await;
+
q.queue.front().cloned()
+
}
+
+
/// call to clear the latest key from the refresh queue. concurrent consumers not allowed.
+
///
+
/// must provide the last peeked refresh queue item as a small safety check
+
async fn complete_refresh(&self, key: &IdentityKey) -> Result<(), IdentityError> {
+
let mut q = self.refresh_queue.lock().await;
+
+
let Some(queue_key) = q.queue.pop_front() else {
+
// gone from queue + since we're in an error condition, make sure it's not stuck in items
+
// (not toctou because we have the lock)
+
// bolder here than below and removing from items because if the queue is *empty*, then we
+
// know it hasn't been re-added since losing sync.
+
if q.items.remove(key) {
+
log::error!("identity refresh: queue de-sync: not in ");
+
} else {
+
log::warn!(
+
"identity refresh: tried to complete with wrong key. are multiple queue consumers running?"
+
);
+
}
+
return Err(IdentityError::RefreshQueueKeyError("no key in queue"));
+
};
+
+
if queue_key != *key {
+
// extra weird case here, what's the most defensive behaviour?
+
// we have two keys: ours should have been first but isn't. this shouldn't happen, so let's
+
// just leave items alone for it. risks unbounded growth but we're in a bad place already.
+
// the other key is the one we just popped. we didn't want it, so maybe we should put it
+
// back, BUT if we somehow ended up with concurrent consumers, we have bigger problems. take
+
// responsibility for taking it instead: remove it from items as well, and just drop it.
+
//
+
// hope that whoever calls us takes this error seriously.
+
if q.items.remove(&queue_key) {
+
log::warn!(
+
"identity refresh: queue de-sync + dropping a bystander key without refreshing it!"
+
);
+
} else {
+
// you thought things couldn't get weirder? (i mean hopefully they can't)
+
log::error!("identity refresh: queue de-sync + bystander key also de-sync!?");
+
}
+
return Err(IdentityError::RefreshQueueKeyError(
+
"wrong key at front of queue",
+
));
+
}
+
+
if q.items.remove(key) {
+
Ok(())
+
} else {
+
log::error!("identity refresh: queue de-sync: key not in items");
+
Err(IdentityError::RefreshQueueKeyError("key not in items"))
+
}
+
}
+
+
/// run the refresh queue consumer
+
pub async fn run_refresher(&self) -> Result<(), IdentityError> {
+
let _guard = self
+
.refresher
+
.try_lock()
+
.expect("there to only be one refresher running");
+
loop {
+
let Some(task_key) = self.peek_refresh().await else {
+
tokio::time::sleep(tokio::time::Duration::from_millis(100)).await;
+
continue;
+
};
+
match task_key {
+
IdentityKey::Handle(ref handle) => {
+
log::trace!("refreshing handle {handle:?}");
+
match self.handle_resolver.resolve(handle).await {
+
Ok(did) => {
+
self.cache.insert(
+
task_key.clone(),
+
IdentityVal(UtcDateTime::now(), IdentityData::Did(did)),
+
);
+
}
+
Err(atrium_identity::Error::NotFound) => {
+
self.cache.insert(
+
task_key.clone(),
+
IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
+
);
+
}
+
Err(err) => {
+
log::warn!(
+
"failed to refresh handle: {err:?}. leaving stale (should we eventually do something?)"
+
);
+
}
+
}
+
self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
+
}
+
IdentityKey::Did(ref did) => {
+
log::trace!("refreshing did doc: {did:?}");
+
+
match self.did_resolver.resolve(did).await {
+
Ok(did_doc) => {
+
// TODO: fix in atrium: should verify id is did
+
if did_doc.id != did.to_string() {
+
log::warn!(
+
"refreshed did doc failed: wrong did doc id. dropping refresh."
+
);
+
continue;
+
}
+
let mini_doc = match did_doc.try_into() {
+
Ok(md) => md,
+
Err(e) => {
+
log::warn!(
+
"converting mini doc failed: {e:?}. dropping refresh."
+
);
+
continue;
+
}
+
};
+
self.cache.insert(
+
task_key.clone(),
+
IdentityVal(UtcDateTime::now(), IdentityData::Doc(mini_doc)),
+
);
+
}
+
Err(atrium_identity::Error::NotFound) => {
+
self.cache.insert(
+
task_key.clone(),
+
IdentityVal(UtcDateTime::now(), IdentityData::NotFound),
+
);
+
}
+
Err(err) => {
+
log::warn!(
+
"failed to refresh did doc: {err:?}. leaving stale (should we eventually do something?)"
+
);
+
}
+
}
+
+
self.complete_refresh(&task_key).await?; // failures are bugs, so break loop
+
}
+
}
+
}
+
}
+
}
+
+
pub struct HickoryDnsTxtResolver(TokioResolver);
+
+
impl HickoryDnsTxtResolver {
+
fn new() -> Result<Self, ResolveError> {
+
Ok(Self(TokioResolver::builder_tokio()?.build()))
+
}
+
}
+
+
impl DnsTxtResolver for HickoryDnsTxtResolver {
+
async fn resolve(
+
&self,
+
query: &str,
+
) -> core::result::Result<Vec<String>, Box<dyn std::error::Error + Send + Sync>> {
+
match self.0.txt_lookup(query).await {
+
Ok(r) => {
+
metrics::counter!("whoami_resolve_dns_txt", "success" => "true").increment(1);
+
Ok(r.iter().map(|r| r.to_string()).collect())
+
}
+
Err(e) => {
+
metrics::counter!("whoami_resolve_dns_txt", "success" => "false").increment(1);
+
Err(e.into())
+
}
+
}
+
}
+
}
+3 -1
slingshot/src/lib.rs
···
mod consumer;
pub mod error;
mod firehose_cache;
+
mod identity;
mod record;
mod server;
pub use consumer::consume;
pub use firehose_cache::firehose_cache;
-
pub use record::CachedRecord;
+
pub use identity::Identity;
+
pub use record::{CachedRecord, Repo};
pub use server::serve;
+34 -3
slingshot/src/main.rs
···
// use foyer::HybridCache;
// use foyer::{Engine, DirectFsDeviceOptions, HybridCacheBuilder};
use metrics_exporter_prometheus::PrometheusBuilder;
-
use slingshot::{consume, error::MainTaskError, firehose_cache, serve};
+
use slingshot::{Identity, Repo, consume, error::MainTaskError, firehose_cache, serve};
+
use std::path::PathBuf;
use clap::Parser;
use tokio_util::sync::CancellationToken;
···
/// reduces CPU at the expense of more ingress bandwidth
#[arg(long, action)]
jetstream_no_zstd: bool,
+
/// where to keep disk caches
+
#[arg(long)]
+
cache_dir: PathBuf,
}
#[tokio::main]
···
log::info!("metrics listening at http://0.0.0.0:8765");
}
+
std::fs::create_dir_all(&args.cache_dir).map_err(|e| {
+
format!(
+
"failed to ensure cache parent dir: {e:?} (dir: {:?})",
+
args.cache_dir
+
)
+
})?;
+
let cache_dir = args.cache_dir.canonicalize().map_err(|e| {
+
format!(
+
"failed to canonicalize cache_dir: {e:?} (dir: {:?})",
+
args.cache_dir
+
)
+
})?;
+
log::info!("cache dir ready at at {cache_dir:?}.");
+
log::info!("setting up firehose cache...");
-
let cache = firehose_cache("./foyer").await?;
+
let cache = firehose_cache(cache_dir.join("./firehose")).await?;
log::info!("firehose cache ready.");
let mut tasks: tokio::task::JoinSet<Result<(), MainTaskError>> = tokio::task::JoinSet::new();
+
log::info!("starting identity service...");
+
let identity = Identity::new(cache_dir.join("./identity"))
+
.await
+
.map_err(|e| format!("identity setup failed: {e:?}"))?;
+
log::info!("identity service ready.");
+
let identity_refresher = identity.clone();
+
tasks.spawn(async move {
+
identity_refresher.run_refresher().await?;
+
Ok(())
+
});
+
+
let repo = Repo::new(identity);
+
let server_shutdown = shutdown.clone();
let server_cache_handle = cache.clone();
tasks.spawn(async move {
-
serve(server_cache_handle, server_shutdown).await?;
+
serve(server_cache_handle, repo, server_shutdown).await?;
Ok(())
});
+100 -1
slingshot/src/record.rs
···
-
use jetstream::exports::Cid;
+
//! cached record storage
+
+
use crate::{Identity, error::RecordError};
+
use atrium_api::types::string::{Cid, Did, Handle};
+
use reqwest::Client;
use serde::{Deserialize, Serialize};
use serde_json::value::RawValue;
+
use std::str::FromStr;
+
use std::time::Duration;
+
use url::Url;
#[derive(Debug, Serialize, Deserialize)]
pub struct RawRecord {
···
Found(RawRecord),
Deleted,
}
+
+
//////// upstream record fetching
+
+
#[derive(Deserialize)]
+
struct RecordResponseObject {
+
#[allow(dead_code)] // expect it to be there but we ignore it
+
uri: String,
+
/// CID for this exact version of the record
+
///
+
/// this is optional in the spec and that's potentially TODO for slingshot
+
cid: Option<String>,
+
/// the record itself as JSON
+
value: Box<RawValue>,
+
}
+
+
#[derive(Clone)]
+
pub struct Repo {
+
identity: Identity,
+
client: Client,
+
}
+
+
impl Repo {
+
pub fn new(identity: Identity) -> Self {
+
let client = Client::builder()
+
.user_agent(format!(
+
"microcosm slingshot v{} (dev: @bad-example.com)",
+
env!("CARGO_PKG_VERSION")
+
))
+
.no_proxy()
+
.timeout(Duration::from_secs(10))
+
.build()
+
.unwrap();
+
Repo { identity, client }
+
}
+
+
pub async fn get_record(
+
&self,
+
did_or_handle: String,
+
collection: String,
+
rkey: String,
+
cid: Option<String>,
+
) -> Result<CachedRecord, RecordError> {
+
let did = match Did::new(did_or_handle.clone()) {
+
Ok(did) => did,
+
Err(_) => {
+
let handle = Handle::new(did_or_handle).map_err(|_| RecordError::BadRepo)?;
+
let Some(did) = self.identity.handle_to_did(handle).await? else {
+
return Err(RecordError::NotFound("could not resolve and verify handle"));
+
};
+
did
+
}
+
};
+
let Some(pds) = self.identity.did_to_pds(did.clone()).await? else {
+
return Err(RecordError::NotFound("could not get pds for DID"));
+
};
+
+
// TODO: throttle by host probably, generally guard against outgoing requests
+
+
let mut params = vec![
+
("repo", did.to_string()),
+
("collection", collection),
+
("rkey", rkey),
+
];
+
if let Some(cid) = cid {
+
params.push(("cid", cid));
+
}
+
let mut url = Url::parse_with_params(&pds, &params)?;
+
url.set_path("/xrpc/com.atproto.repo.getRecord");
+
+
let res = self
+
.client
+
.get(url)
+
.send()
+
.await
+
.map_err(RecordError::SendError)?
+
.error_for_status()
+
.map_err(RecordError::StatusError)? // TODO atproto error handling (think about handling not found)
+
.json::<RecordResponseObject>()
+
.await
+
.map_err(RecordError::ParseJsonError)?; // todo...
+
+
let Some(cid) = res.cid else {
+
return Err(RecordError::MissingUpstreamCid);
+
};
+
let cid = Cid::from_str(&cid).map_err(|e| RecordError::BadUpstreamCid(e.to_string()))?;
+
+
Ok(CachedRecord::Found(RawRecord {
+
cid,
+
record: res.value.to_string(),
+
}))
+
}
+
}
+38 -11
slingshot/src/server.rs
···
-
use crate::{CachedRecord, error::ServerError};
+
use crate::{CachedRecord, Repo, error::ServerError};
use foyer::HybridCache;
+
use std::sync::Arc;
use tokio_util::sync::CancellationToken;
use poem::{Route, Server, listener::TcpListener};
···
struct Xrpc {
cache: HybridCache<String, CachedRecord>,
+
repo: Arc<Repo>,
}
#[OpenApi]
···
///
/// NOTE: handles should be accepted here but this is still TODO in slingshot
#[oai(example = "example_did")]
-
repo: Query<String>,
+
Query(repo): Query<String>,
/// The NSID of the record collection
#[oai(example = "example_collection")]
-
collection: Query<String>,
+
Query(collection): Query<String>,
/// The Record key
#[oai(example = "example_rkey")]
-
rkey: Query<String>,
+
Query(rkey): Query<String>,
/// Optional: the CID of the version of the record.
///
/// If not specified, then return the most recent version.
···
/// If specified and a newer version of the record exists, returns 404 not
/// found. That is: slingshot only retains the most recent version of a
/// record.
-
cid: Query<Option<String>>,
+
Query(cid): Query<Option<String>>,
) -> GetRecordResponse {
// TODO: yeah yeah
-
let at_uri = format!("at://{}/{}/{}", &*repo, &*collection, &*rkey);
+
let at_uri = format!("at://{repo}/{collection}/{rkey}");
let entry = self
.cache
-
.fetch(at_uri.clone(), || async move { todo!() })
+
.fetch(at_uri.clone(), {
+
let cid = cid.clone();
+
let repo_api = self.repo.clone();
+
|| async move {
+
repo_api
+
.get_record(repo, collection, rkey, cid)
+
.await
+
.map_err(|e| foyer::Error::Other(Box::new(e)))
+
}
+
})
.await
-
.unwrap();
+
.unwrap(); // todo
// TODO: actual 404
···
})),
}
}
+
+
// TODO
+
// #[oai(path = "/com.atproto.identity.resolveHandle", method = "get")]
+
// #[oai(path = "/com.atproto.identity.resolveDid", method = "get")]
+
// but these are both not specified to do bidirectional validation, which is what we want to offer
+
// com.atproto.identity.resolveIdentity seems right, but requires returning the full did-doc
+
// would be nice if there were two queries:
+
// did -> verified handle + pds url
+
// handle -> verified did + pds url
+
//
+
// we could do horrible things and implement resolveIdentity with only a stripped-down fake did doc
+
// but this will *definitely* cause problems because eg. we're not currently storing pubkeys and
+
// those are a little bit important
}
pub async fn serve(
cache: HybridCache<String, CachedRecord>,
+
repo: Repo,
_shutdown: CancellationToken,
) -> Result<(), ServerError> {
-
let api_service = OpenApiService::new(Xrpc { cache }, "Slingshot", env!("CARGO_PKG_VERSION"))
-
.server("http://localhost:3000")
-
.url_prefix("/xrpc");
+
let repo = Arc::new(repo);
+
let api_service =
+
OpenApiService::new(Xrpc { cache, repo }, "Slingshot", env!("CARGO_PKG_VERSION"))
+
.server("http://localhost:3000")
+
.url_prefix("/xrpc");
let app = Route::new()
.nest("/", api_service.scalar())