Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

oh nooooooo we don't have id->target in rocks aaaa

Changed files
+260 -130
constellation
+7 -19
constellation/src/server/mod.rs
···
.await
.map_err(to500)?
}
-
})
+
}),
)
.route(
"/links/count",
···
#[template(path = "get-many-to-many-counts.html.j2")]
struct GetManyToManyCountsResponse {
counts_by_other_subject: Vec<OtherSubjectCount>,
-
total_other_subjects: u64,
cursor: Option<OpaqueApiCursor>,
#[serde(skip_serializing)]
query: GetManyToManyCountsQuery,
···
)
.map_err(|_| http::StatusCode::INTERNAL_SERVER_ERROR)?;
-
let cursor = paged.next.map(|next| {
-
ApiKeyedCursor {
-
version: paged.total,
-
next,
-
}
-
.into()
-
});
+
let cursor = paged.next.map(|next| ApiKeyedCursor { next }.into());
let items = paged
.items
.into_iter()
-
.map(|(subject, total, distinct)|
-
OtherSubjectCount {
-
subject,
-
total,
-
distinct,
-
})
+
.map(|(subject, total, distinct)| OtherSubjectCount {
+
subject,
+
total,
+
distinct,
+
})
.collect();
Ok(acceptable(
accept,
GetManyToManyCountsResponse {
counts_by_other_subject: items,
-
total_other_subjects: paged.total,
cursor,
query: (*query).clone(),
},
))
}
-
-
#[derive(Clone, Deserialize)]
struct GetLinksCountQuery {
···
#[derive(Serialize, Deserialize)] // for bincode
struct ApiKeyedCursor {
-
version: u64, // total length (dirty check)
next: String, // the key
}
+17 -19
constellation/src/storage/mem_store.rs
···
-
use super::{LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats};
+
use super::{
+
LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats,
+
};
use crate::{ActionableEvent, CountsByCount, Did, RecordId};
use anyhow::Result;
use links::CollectedLink;
···
filter_dids: &HashSet<Did>,
filter_to_targets: &HashSet<String>,
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
-
let empty = || {
-
PagedOrderedCollection {
-
items: vec![],
-
next: None,
-
total: 0,
-
}
-
};
let data = self.0.lock().unwrap();
let Some(paths) = data.targets.get(&Target::new(target)) else {
-
return Ok(empty());
+
return Ok(PagedOrderedCollection::default());
};
let Some(linkers) = paths.get(&Source::new(collection, path)) else {
-
return Ok(empty());
+
return Ok(PagedOrderedCollection::default());
};
let path_to_other = RecordPath::new(path_to_other);
-
let filter_to_targets: HashSet::<Target> = HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s)));
+
let filter_to_targets: HashSet<Target> =
+
HashSet::from_iter(filter_to_targets.iter().map(|s| Target::new(s)));
let mut grouped_counts: HashMap<Target, (u64, HashSet<Did>)> = HashMap::new();
for (did, rkey) in linkers.into_iter().cloned().filter_map(|l| l) {
···
.links
.get(&did)
.unwrap_or(&HashMap::new())
-
.get(&RepoId { collection: collection.to_string(), rkey })
+
.get(&RepoId {
+
collection: collection.to_string(),
+
rkey,
+
})
.unwrap_or(&Vec::new())
.into_iter()
.filter_map(|(path, target)| {
if *path == path_to_other
&& (filter_to_targets.is_empty() || filter_to_targets.contains(target))
-
{ Some(target) } else { None }
+
{
+
Some(target)
+
} else {
+
None
+
}
})
.take(1)
.next()
···
e.1.insert(did.clone());
}
}
-
let total = grouped_counts.len() as u64;
let mut items: Vec<(String, u64, u64)> = grouped_counts
.iter()
.map(|(k, (n, u))| (k.0.clone(), *n, u.len() as u64))
···
} else {
None
};
-
Ok(PagedOrderedCollection {
-
items,
-
next,
-
total,
-
})
+
Ok(PagedOrderedCollection { items, next })
}
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
+81 -79
constellation/src/storage/mod.rs
···
/// this has weaker guarantees than PagedAppendingCollection: it might
/// return a totally consistent snapshot. but it should avoid duplicates
/// and each page should at least be internally consistent.
-
#[derive(Debug, PartialEq)]
+
#[derive(Debug, PartialEq, Default)]
pub struct PagedOrderedCollection<T, K: Ord> {
pub items: Vec<T>,
pub next: Option<K>,
-
pub total: u64,
}
#[derive(Debug, Deserialize, Serialize, PartialEq)]
···
//////// many-to-many /////////
test_each_storage!(get_m2m_counts_empty, |storage| {
-
assert_eq!(storage.get_many_to_many_counts(
-
"a.com",
-
"a.b.c",
-
".d.e",
-
".f.g",
-
10,
-
None,
-
&HashSet::new(),
-
&HashSet::new(),
-
)?, PagedOrderedCollection {
-
items: vec![],
-
next: None,
-
total: 0,
-
});
+
assert_eq!(
+
storage.get_many_to_many_counts(
+
"a.com",
+
"a.b.c",
+
".d.e",
+
".f.g",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?,
+
PagedOrderedCollection {
+
items: vec![],
+
next: None,
+
}
+
);
});
test_each_storage!(get_m2m_counts_single, |storage| {
···
},
0,
)?;
-
assert_eq!(storage.get_many_to_many_counts(
-
"a.com",
-
"app.t.c",
-
".abc.uri",
-
".def.uri",
-
10,
-
None,
-
&HashSet::new(),
-
&HashSet::new(),
-
)?, PagedOrderedCollection {
-
items: vec![("b.com".to_string(), 1, 1)],
-
next: None,
-
total: 1,
-
});
+
assert_eq!(
+
storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?,
+
PagedOrderedCollection {
+
items: vec![("b.com".to_string(), 1, 1)],
+
next: None,
+
}
+
);
});
test_each_storage!(get_m2m_counts_filters, |storage| {
···
},
3,
)?;
-
assert_eq!(storage.get_many_to_many_counts(
-
"a.com",
-
"app.t.c",
-
".abc.uri",
-
".def.uri",
-
10,
-
None,
-
&HashSet::new(),
-
&HashSet::new(),
-
)?, PagedOrderedCollection {
-
items: vec![
-
("b.com".to_string(), 2, 2),
-
("c.com".to_string(), 2, 1),
-
],
-
next: None,
-
total: 2,
-
});
-
assert_eq!(storage.get_many_to_many_counts(
-
"a.com",
-
"app.t.c",
-
".abc.uri",
-
".def.uri",
-
10,
-
None,
-
&HashSet::from_iter([Did("did:plc:fdsa".to_string())]),
-
&HashSet::new(),
-
)?, PagedOrderedCollection {
-
items: vec![
-
("c.com".to_string(), 2, 1),
-
],
-
next: None,
-
total: 1,
-
});
-
assert_eq!(storage.get_many_to_many_counts(
-
"a.com",
-
"app.t.c",
-
".abc.uri",
-
".def.uri",
-
10,
-
None,
-
&HashSet::new(),
-
&HashSet::from_iter(["b.com".to_string()]),
-
)?, PagedOrderedCollection {
-
items: vec![
-
("b.com".to_string(), 2, 2),
-
],
-
next: None,
-
total: 1,
-
});
+
assert_eq!(
+
storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::new(),
+
)?,
+
PagedOrderedCollection {
+
items: vec![("b.com".to_string(), 2, 2), ("c.com".to_string(), 2, 1),],
+
next: None,
+
}
+
);
+
assert_eq!(
+
storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::from_iter([Did("did:plc:fdsa".to_string())]),
+
&HashSet::new(),
+
)?,
+
PagedOrderedCollection {
+
items: vec![("c.com".to_string(), 2, 1),],
+
next: None,
+
}
+
);
+
assert_eq!(
+
storage.get_many_to_many_counts(
+
"a.com",
+
"app.t.c",
+
".abc.uri",
+
".def.uri",
+
10,
+
None,
+
&HashSet::new(),
+
&HashSet::from_iter(["b.com".to_string()]),
+
)?,
+
PagedOrderedCollection {
+
items: vec![("b.com".to_string(), 2, 2),],
+
next: None,
+
}
+
);
});
+155 -13
constellation/src/storage/rocks_store.rs
···
-
use super::{ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection, StorageStats};
+
use super::{
+
ActionableEvent, LinkReader, LinkStorage, PagedAppendingCollection, PagedOrderedCollection,
+
StorageStats,
+
};
use crate::{CountsByCount, Did, RecordId};
use anyhow::{bail, Result};
use bincode::Options as BincodeOptions;
···
MultiThreaded, Options, PrefixRange, ReadOptions, WriteBatch,
};
use serde::{Deserialize, Serialize};
-
use std::collections::{HashMap, HashSet};
+
use std::collections::{BTreeMap, HashMap, HashSet};
use std::io::Read;
use std::marker::PhantomData;
use std::path::{Path, PathBuf};
···
impl LinkReader for RocksStorage {
fn get_many_to_many_counts(
&self,
-
_target: &str,
-
_collection: &str,
-
_path: &str,
-
_path_to_other: &str,
-
_limit: u64,
-
_after: Option<String>,
-
_filter_dids: &HashSet<Did>,
-
_filter_to_targets: &HashSet<String>,
+
target: &str,
+
collection: &str,
+
path: &str,
+
path_to_other: &str,
+
limit: u64,
+
after: Option<String>,
+
filter_dids: &HashSet<Did>,
+
filter_to_targets: &HashSet<String>,
) -> Result<PagedOrderedCollection<(String, u64, u64), String>> {
-
todo!();
+
let collection = Collection(collection.to_string());
+
let path = RPath(path.to_string());
+
+
let target_key = TargetKey(Target(target.to_string()), collection.clone(), path.clone());
+
+
// unfortunately the cursor is a, uh, stringified number.
+
// this was easier for the memstore (plain target, not target id), and
+
// making it generic is a bit awful.
+
// so... parse the number out of a string here :(
+
// TODO: this should bubble up to a BAD_REQUEST response
+
let after = after.map(|s| s.parse::<u64>().map(TargetId)).transpose()?;
+
+
let Some(target_id) = self.target_id_table.get_id_val(&self.db, &target_key)? else {
+
return Ok(Default::default());
+
};
+
+
let filter_did_ids: HashMap<DidId, bool> = filter_dids
+
.into_iter()
+
.filter_map(|did| self.did_id_table.get_id_val(&self.db, did).transpose())
+
.collect::<Result<Vec<DidIdValue>>>()?
+
.into_iter()
+
.map(|DidIdValue(id, active)| (id, active))
+
.collect();
+
+
let filter_to_target_ids = filter_to_targets
+
.into_iter()
+
.filter_map(|target| {
+
self.target_id_table
+
.get_id_val(
+
&self.db,
+
&TargetKey(Target(target.to_string()), collection.clone(), path.clone()),
+
)
+
.transpose()
+
})
+
.collect::<Result<HashSet<TargetId>>>()?;
+
+
let linkers = self.get_target_linkers(&target_id)?;
+
+
let mut grouped_counts: BTreeMap<TargetId, (u64, HashSet<DidId>)> = BTreeMap::new();
+
+
for (did_id, rkey) in linkers.0 {
+
if did_id.is_empty() {
+
continue;
+
}
+
+
if !filter_did_ids.is_empty() && filter_did_ids.get(&did_id) != Some(&true) {
+
continue;
+
}
+
+
let record_link_key = RecordLinkKey(did_id, collection.clone(), rkey);
+
let Some(targets) = self.get_record_link_targets(&record_link_key)? else {
+
continue;
+
};
+
+
let Some(fwd_target) = targets
+
.0
+
.into_iter()
+
.filter_map(|RecordLinkTarget(rpath, target_id)| {
+
if rpath.0 == path_to_other
+
&& (filter_to_target_ids.is_empty()
+
|| filter_to_target_ids.contains(&target_id))
+
{
+
Some(target_id)
+
} else {
+
None
+
}
+
})
+
.take(1)
+
.next()
+
else {
+
continue;
+
};
+
+
// small relief: we page over target ids, so we can already bail
+
// reprocessing previous pages here
+
if after.as_ref().map(|a| fwd_target <= *a).unwrap_or(false) {
+
continue;
+
}
+
+
// aand we can skip target ids that must be on future pages
+
// (this check continues after the did-lookup, which we have to do)
+
let page_is_full = grouped_counts.len() as u64 >= limit;
+
if page_is_full {
+
let current_max = grouped_counts.keys().rev().next().unwrap(); // limit should be non-zero bleh
+
if fwd_target > *current_max {
+
continue;
+
}
+
}
+
+
// bit painful: 2-step lookup to make sure this did is active
+
let Some(did) = self.did_id_table.get_val_from_id(&self.db, did_id.0)? else {
+
eprintln!("failed to look up did from did_id {did_id:?}");
+
continue;
+
};
+
let Some(DidIdValue(_, active)) = self.did_id_table.get_id_val(&self.db, &did)? else {
+
eprintln!("failed to look up did_value from did_id {did_id:?}: {did:?}: data consistency bug?");
+
continue;
+
};
+
if !active {
+
continue;
+
}
+
+
// page-management, continued
+
// if we have a full page, and we're inserting a *new* key less than
+
// the current max, then we can evict the current max
+
let mut should_evict = false;
+
let entry = grouped_counts.entry(fwd_target.clone()).or_insert_with(|| {
+
// this is a *new* key, so kick the max if we're full
+
should_evict = page_is_full;
+
Default::default()
+
});
+
entry.0 += 1;
+
entry.1.insert(did_id.clone());
+
+
if should_evict {
+
grouped_counts.pop_last();
+
}
+
}
+
+
let mut items: Vec<(String, u64, u64)> = Vec::with_capacity(grouped_counts.len());
+
for (target_id, (n, dids)) in grouped_counts {
+
let Some(target) = self.target_id_table.get_val_from_id(&self.db, target_id)? else {
+
eprintln!("failed to look up target from target_id {target_id:?}");
+
continue;
+
};
+
items.push((target, n, dids.len() as u64));
+
}
+
+
let next = if grouped_counts.len() as u64 >= limit {
+
// yeah.... it's a number saved as a string......sorry
+
grouped_counts
+
.keys()
+
.rev()
+
.next()
+
.map(|k| format!("{}", k.0))
+
} else {
+
None
+
};
+
+
Ok(PagedOrderedCollection { items, next })
}
fn get_count(&self, target: &str, collection: &str, path: &str) -> Result<u64> {
···
// target ids
-
#[derive(Debug, Clone, Serialize, Deserialize)]
+
#[derive(Debug, Clone, Serialize, Deserialize, PartialOrd, Ord, PartialEq, Eq, Hash)]
struct TargetId(u64); // key
-
#[derive(Debug, Clone, Serialize, Deserialize)]
+
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
pub struct Target(pub String); // the actual target/uri
// targets (uris, dids, etc.): the reverse index