Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

wip: many-to-many counts (memorystore works!)

Changed files
+458 -3
constellation
+149 -1
constellation/src/server/mod.rs
···
const INDEX_BEGAN_AT_TS: u64 = 1738083600; // TODO: not this
fn to500(e: tokio::task::JoinError) -> http::StatusCode {
-
eprintln!("handler join error: {e}");
+
eprintln!("handler error: {e}");
http::StatusCode::INTERNAL_SERVER_ERROR
}
···
.map_err(to500)?
}
}),
+
)
+
.route(
+
"/xrpc/blue.microcosm.links.getManyToManyCounts",
+
get({
+
let store = store.clone();
+
move |accept, query| async {
+
spawn_blocking(|| get_many_to_many_counts(accept, query, store))
+
.await
+
.map_err(to500)?
+
}
+
})
)
.route(
"/links/count",
···
stats,
}))
}
+
+
#[derive(Clone, Deserialize)]
+
#[serde(rename_all = "camelCase")]
+
struct GetManyToManyCountsQuery {
+
subject: String,
+
source: String,
+
/// path to the secondary link in the linking record
+
path_to_other: String,
+
/// filter to linking records (join of the m2m) by these DIDs
+
#[serde(default)]
+
did: Vec<String>,
+
/// filter to specific secondary records
+
#[serde(default)]
+
other_subject: Vec<String>,
+
cursor: Option<OpaqueApiCursor>,
+
/// Set the max number of links to return per page of results
+
#[serde(default = "get_default_cursor_limit")]
+
limit: u64,
+
}
+
#[derive(Serialize)]
+
struct OtherSubjectCount {
+
subject: String,
+
total: u64,
+
distinct: u64,
+
}
+
#[derive(Template, Serialize)]
+
#[template(path = "get-many-to-many-counts.html.j2")]
+
struct GetManyToManyCountsResponse {
+
counts_by_other_subject: Vec<OtherSubjectCount>,
+
total_other_subjects: u64,
+
cursor: Option<OpaqueApiCursor>,
+
#[serde(skip_serializing)]
+
query: GetManyToManyCountsQuery,
+
}
+
fn get_many_to_many_counts(
+
accept: ExtractAccept,
+
query: axum_extra::extract::Query<GetManyToManyCountsQuery>,
+
store: impl LinkReader,
+
) -> Result<impl IntoResponse, http::StatusCode> {
+
let cursor_key = query
+
.cursor
+
.clone()
+
.map(|oc| ApiKeyedCursor::try_from(oc).map_err(|_| http::StatusCode::BAD_REQUEST))
+
.transpose()?
+
.map(|c| c.next);
+
+
let limit = query.limit;
+
if limit > DEFAULT_CURSOR_LIMIT_MAX {
+
return Err(http::StatusCode::BAD_REQUEST);
+
}
+
+
let filter_dids: HashSet<Did> = HashSet::from_iter(
+
query
+
.did
+
.iter()
+
.map(|d| d.trim())
+
.filter(|d| !d.is_empty())
+
.map(|d| Did(d.to_string())),
+
);
+
+
let filter_other_subjects: HashSet<String> = HashSet::from_iter(
+
query
+
.other_subject
+
.iter()
+
.map(|s| s.trim().to_string())
+
.filter(|s| !s.is_empty()),
+
);
+
+
let Some((collection, path)) = query.source.split_once(':') else {
+
return Err(http::StatusCode::BAD_REQUEST);
+
};
+
let path = format!(".{path}");
+
+
let paged = store
+
.get_many_to_many_counts(
+
&query.subject,
+
collection,
+
&path,
+
&query.path_to_other,
+
limit,
+
cursor_key,
+
&filter_dids,
+
&filter_other_subjects,
+
)
+
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
+
+
let cursor = paged.next.map(|next| {
+
ApiKeyedCursor {
+
version: paged.total,
+
next,
+
}
+
.into()
+
});
+
+
let items = paged
+
.items
+
.into_iter()
+
.map(|(subject, total, distinct)|
+
OtherSubjectCount {
+
subject,
+
total,
+
distinct,
+
})
+
.collect();
+
+
Ok(acceptable(
+
accept,
+
GetManyToManyCountsResponse {
+
counts_by_other_subject: items,
+
total_other_subjects: paged.total,
+
cursor,
+
query: (*query).clone(),
+
},
+
))
+
}
+
+
#[derive(Clone, Deserialize)]
struct GetLinksCountQuery {
···
OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap())
}
}
+
+
#[derive(Serialize, Deserialize)] // for bincode
+
struct ApiKeyedCursor {
+
version: u64, // total length (dirty check)
+
next: String, // the key
+
}
+
+
impl TryFrom<OpaqueApiCursor> for ApiKeyedCursor {
+
type Error = bincode::Error;
+
+
fn try_from(item: OpaqueApiCursor) -> Result<Self, Self::Error> {
+
bincode::DefaultOptions::new().deserialize(&item.0)
+
}
+
}
+
+
impl From<ApiKeyedCursor> for OpaqueApiCursor {
+
fn from(item: ApiKeyedCursor) -> Self {
+
OpaqueApiCursor(bincode::DefaultOptions::new().serialize(&item).unwrap())
+
}
+
}
+78 -1
constellation/src/storage/mem_store.rs
···
-
use super::{LinkReader, LinkStorage, PagedAppendingCollection, StorageStats};
+
use super::{LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats};
use crate::{ActionableEvent, CountsByCount, Did, RecordId};
use anyhow::Result;
use links::CollectedLink;
···
}
impl LinkReader for MemStorage {
+
fn get_many_to_many_counts(
+
&self,
+
target: &str,
+
collection: &str,
+
path: &str,
+
path_to_other: &str,
+
limit: u64,
+
after: Option<String>,
+
filter_dids: &HashSet<Did>,
+
filter_to_targets: &HashSet<String>,
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
+
let empty = || {
+
PagedOrderedCollection {
+
items: vec![],
+
next: None,
+
total: 0,
+
}
+
};
+
let data = self.0.lock().unwrap();
+
let Some(paths) = data.targets.get(&Target::new(target)) else {
+
return Ok(empty());
+
};
+
let Some(linkers) = paths.get(&Source::new(collection, path)) else {
+
return Ok(empty());
+
};
+
+
let path_to_other = RecordPath::new(path_to_other);
+
let filter_to_targets: HashSet::<Target> = HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s)));
+
+
let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new();
+
for (did, rkey) in linkers.into_iter().cloned().filter_map(|l| l) {
+
if !filter_dids.is_empty() && !filter_dids.contains(&did) {
+
continue;
+
}
+
if let Some(fwd_target) = data
+
.links
+
.get(&did)
+
.unwrap_or(&HashMap::new())
+
.get(&RepoId { collection: collection.to_string(), rkey })
+
.unwrap_or(&Vec::new())
+
.into_iter()
+
.filter_map(|(path, target)| {
+
if *path == path_to_other
+
&& (filter_to_targets.is_empty() || filter_to_targets.contains(target))
+
{ Some(target) } else { None }
+
})
+
.take(1)
+
.next()
+
{
+
let e = grouped_counts.entry(fwd_target.clone()).or_default();
+
e.0 += 1;
+
e.1.insert(did.clone());
+
}
+
}
+
let total = grouped_counts.len() as u64;
+
let mut items: Vec<(String, u64, u64)> = grouped_counts
+
.iter()
+
.map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64))
+
.collect();
+
items.sort();
+
items = items
+
.into_iter()
+
.skip_while(|(t, _, _)| after.as_ref().map(|a| t <= a).unwrap_or(false))
+
.take(limit as usize)
+
.collect();
+
let next = if items.len() as u64 >= limit {
+
items.last().map(|(t, _, _)| t.clone())
+
} else {
+
None
+
};
+
Ok(PagedOrderedCollection {
+
items,
+
next,
+
total,
+
})
+
}
+
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
let data = self.0.lock().unwrap();
let Some(paths) = data.targets.get(&Target::new(target)) else {
+216
constellation/src/storage/mod.rs
···
pub total: u64,
}
+
/// A paged collection whose keys are sorted instead of indexed
+
///
+
/// this has weaker guarantees than PagedAppendingCollection: it might
+
/// return a totally consistent snapshot. but it should avoid duplicates
+
/// and each page should at least be internally consistent.
+
#[derive(Debug, PartialEq)]
+
pub struct PagedOrderedCollection<T, K: Ord> {
+
pub items: Vec<T>,
+
pub next: Option<K>,
+
pub total: u64,
+
}
+
#[derive(Debug, Deserialize, Serialize, PartialEq)]
pub struct StorageStats {
/// estimate of how many accounts we've seen create links. the _subjects_ of any links are not represented here.
···
}
pub trait LinkReader: Clone + Send + Sync + 'static {
+
fn get_many_to_many_counts(
+
&self,
+
target: &str,
+
collection: &str,
+
path: &str,
+
path_to_other: &str,
+
limit: u64,
+
after: Option<String>,
+
filter_dids: &HashSet<Did>,
+
filter_to_targets: &HashSet<String>,
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>>;
+
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
fn get_distinct_did_count(&self, target: &str, collection: &str, path: &str) -> Result<u64>;
···
counts
});
assert_stats(storage.get_stats()?, 1..=1, 2..=2, 1..=1);
+
});
+
+
//////// many-to-many /////////
+
+
test_each_storage!(get_m2m_counts_empty, |storage| {
+
assert_eq!(storage.get_many_to_many_counts(
+
"a.com",
+
"a.b.c",
+
".d.e",
+
".f.g",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?, PagedOrderedCollection {
+
items: vec![],
+
next: None,
+
total: 0,
+
});
+
});
+
+
test_each_storage!(get_m2m_counts_single, |storage| {
+
storage.push(
+
&ActionableEvent::CreateLinks {
+
record_id: RecordId {
+
did: "did:plc:asdf".into(),
+
collection: "app.t.c".into(),
+
rkey: "asdf".into(),
+
},
+
links: vec![
+
CollectedLink {
+
target: Link::Uri("a.com".into()),
+
path: ".abc.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("b.com".into()),
+
path: ".def.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("b.com".into()),
+
path: ".ghi.uri".into(),
+
},
+
],
+
},
+
0,
+
)?;
+
assert_eq!(storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?, PagedOrderedCollection {
+
items: vec![("b.com".to_string(), 1, 1)],
+
next: None,
+
total: 1,
+
});
+
});
+
+
test_each_storage!(get_m2m_counts_filters, |storage| {
+
storage.push(
+
&ActionableEvent::CreateLinks {
+
record_id: RecordId {
+
did: "did:plc:asdf".into(),
+
collection: "app.t.c".into(),
+
rkey: "asdf".into(),
+
},
+
links: vec![
+
CollectedLink {
+
target: Link::Uri("a.com".into()),
+
path: ".abc.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("b.com".into()),
+
path: ".def.uri".into(),
+
},
+
],
+
},
+
0,
+
)?;
+
storage.push(
+
&ActionableEvent::CreateLinks {
+
record_id: RecordId {
+
did: "did:plc:asdfasdf".into(),
+
collection: "app.t.c".into(),
+
rkey: "asdf".into(),
+
},
+
links: vec![
+
CollectedLink {
+
target: Link::Uri("a.com".into()),
+
path: ".abc.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("b.com".into()),
+
path: ".def.uri".into(),
+
},
+
],
+
},
+
1,
+
)?;
+
storage.push(
+
&ActionableEvent::CreateLinks {
+
record_id: RecordId {
+
did: "did:plc:fdsa".into(),
+
collection: "app.t.c".into(),
+
rkey: "asdf".into(),
+
},
+
links: vec![
+
CollectedLink {
+
target: Link::Uri("a.com".into()),
+
path: ".abc.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("c.com".into()),
+
path: ".def.uri".into(),
+
},
+
],
+
},
+
2,
+
)?;
+
storage.push(
+
&ActionableEvent::CreateLinks {
+
record_id: RecordId {
+
did: "did:plc:fdsa".into(),
+
collection: "app.t.c".into(),
+
rkey: "asdf2".into(),
+
},
+
links: vec![
+
CollectedLink {
+
target: Link::Uri("a.com".into()),
+
path: ".abc.uri".into(),
+
},
+
CollectedLink {
+
target: Link::Uri("c.com".into()),
+
path: ".def.uri".into(),
+
},
+
],
+
},
+
3,
+
)?;
+
assert_eq!(storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?, PagedOrderedCollection {
+
items: vec![
+
("b.com".to_string(), 2, 2),
+
("c.com".to_string(), 2, 1),
+
],
+
next: None,
+
total: 2,
+
});
+
assert_eq!(storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::from_iter([Did("did:plc:fdsa".to_string())]),
+
&HashSet::new(),
+
)?, PagedOrderedCollection {
+
items: vec![
+
("c.com".to_string(), 2, 1),
+
],
+
next: None,
+
total: 1,
+
});
+
assert_eq!(storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::from_iter(["b.com".to_string()]),
+
)?, PagedOrderedCollection {
+
items: vec![
+
("b.com".to_string(), 2, 2),
+
],
+
next: None,
+
total: 1,
+
});
});
+15 -1
constellation/src/storage/rocks_store.rs
···
-
use super::{ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, StorageStats};
+
use super::{ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats};
use crate::{CountsByCount, Did, RecordId};
use anyhow::{bail, Result};
use bincode::Options as BincodeOptions;
···
}
impl LinkReader for RocksStorage {
+
fn get_many_to_many_counts(
+
&self,
+
_target: &str,
+
_collection: &str,
+
_path: &str,
+
_path_to_other: &str,
+
_limit: u64,
+
_after: Option<String>,
+
_filter_dids: &HashSet<Did>,
+
_filter_to_targets: &HashSet<String>,
+
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
+
todo!();
+
}
+
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
let target_key = TargetKey(
Target(target.to_string()),
constellation/templates/get-many-to-many-counts.html.j2

This is a binary file and will not be displayed.