Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

timeseries endpoint

+6
ufos/src/lib.rs
···
dids_estimate: u64,
}
+
#[derive(Debug, Serialize, JsonSchema)]
+
pub struct JustCount {
+
records: u64,
+
dids_estimate: u64,
+
}
+
#[derive(Debug)]
pub enum OrderCollectionsBy {
Lexi { cursor: Option<Vec<u8>> },
+84 -5
ufos/src/server.rs
···
use crate::index_html::INDEX_HTML;
use crate::storage::StoreReader;
use crate::store_types::{HourTruncatedCursor, WeekTruncatedCursor};
-
use crate::{ConsumerInfo, Cursor, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord};
+
use crate::{ConsumerInfo, Cursor, JustCount, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord};
use base64::{engine::general_purpose::URL_SAFE_NO_PAD, Engine as _};
use chrono::{DateTime, Utc};
use dropshot::endpoint;
···
/// Mutually exclusive with `cursor` -- sorted results cannot be paged.
order: Option<CollectionsQueryOrder>,
}
-
#[endpoint {
-
method = GET,
-
path = "/collections"
-
}]
+
/// Get collection with statistics
///
/// ## To fetch a full list:
···
/// Specify the `order` parameter (must be either `records-created` or `did-estimate`). Note that ordered results cannot be paged.
///
/// All statistics are bucketed hourly, so the most granular effecitve time boundary for `since` and `until` is one hour.
+
#[endpoint {
+
method = GET,
+
path = "/collections"
+
}]
async fn get_collections(
ctx: RequestContext<Context>,
query: Query<CollectionsQuery>,
···
})
}
+
#[derive(Debug, Deserialize, JsonSchema)]
+
struct CollectionTimeseriesQuery {
+
collection: String, // JsonSchema not implemented for Nsid :(
+
/// Limit collections and statistics to those seen after this UTC datetime
+
///
+
/// default: 1 week ago
+
since: Option<DateTime<Utc>>,
+
/// Limit collections and statistics to those seen before this UTC datetime
+
///
+
/// default: now
+
until: Option<DateTime<Utc>>,
+
/// time steps between data, in seconds
+
///
+
/// the step will be rounded down to the nearest hour
+
///
+
/// default: 86400 (24hrs)
+
#[schemars(range(min = 3600))]
+
step: Option<u64>,
+
// todo: rolling averages
+
}
+
#[derive(Debug, Serialize, JsonSchema)]
+
struct CollectionTimeseriesResponse {
+
range: Vec<DateTime<Utc>>,
+
series: HashMap<String, Vec<JustCount>>,
+
}
+
/// Get timeseries data
+
#[endpoint {
+
method = GET,
+
path = "/timeseries"
+
}]
+
async fn get_timeseries(
+
ctx: RequestContext<Context>,
+
query: Query<CollectionTimeseriesQuery>,
+
) -> OkCorsResponse<CollectionTimeseriesResponse> {
+
let Context { storage, .. } = ctx.context();
+
let q = query.into_inner();
+
+
let since = q.since.map(dt_to_cursor).transpose()?.unwrap_or_else(|| {
+
let week_ago_secs = 7 * 86_400;
+
let week_ago = SystemTime::now() - Duration::from_secs(week_ago_secs);
+
Cursor::at(week_ago).into()
+
});
+
+
let until = q.until.map(dt_to_cursor).transpose()?;
+
+
let step = if let Some(secs) = q.step {
+
if secs < 3600 {
+
let msg = format!("step is too small: {}", secs);
+
return Err(HttpError::for_bad_request(None, msg));
+
}
+
(secs / 3600) * 3600 // trucate to hour
+
} else {
+
86_400
+
};
+
+
let nsid = Nsid::new(q.collection).map_err(|e| {
+
HttpError::for_bad_request(None, format!("collection was not a valid NSID: {:?}", e))
+
})?;
+
+
let (range_cursors, series) = storage
+
.get_timeseries(vec![nsid], since, until, step)
+
.await
+
.map_err(|e| HttpError::for_internal_error(format!("oh shoot: {e:?}")))?;
+
+
let range = range_cursors
+
.into_iter()
+
.map(|c| DateTime::<Utc>::from_timestamp_micros(c.to_raw_u64() as i64).unwrap())
+
.collect();
+
+
let series = series
+
.into_iter()
+
.map(|(k, v)| (k.to_string(), v.iter().map(Into::into).collect()))
+
.collect();
+
+
ok_cors(CollectionTimeseriesResponse { range, series })
+
}
+
pub async fn serve(storage: impl StoreReader + 'static) -> Result<(), String> {
let log = ConfigLogging::StderrTerminal {
level: ConfigLoggingLevel::Info,
···
api.register(get_records_by_collections).unwrap();
api.register(get_records_total_seen).unwrap();
api.register(get_collections).unwrap();
+
api.register(get_timeseries).unwrap();
let context = Context {
spec: Arc::new(
+10 -2
ufos/src/storage.rs
···
-
use crate::store_types::{HourTruncatedCursor, SketchSecretPrefix};
+
use crate::store_types::{CountsValue, HourTruncatedCursor, SketchSecretPrefix};
use crate::{
error::StorageError, ConsumerInfo, Cursor, EventBatch, NsidCount, OrderCollectionsBy,
UFOsRecord,
};
use async_trait::async_trait;
use jetstream::exports::{Did, Nsid};
-
use std::collections::HashSet;
+
use std::collections::{HashMap, HashSet};
use std::path::Path;
use tokio::sync::mpsc::Receiver;
···
since: Option<HourTruncatedCursor>,
until: Option<HourTruncatedCursor>,
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)>;
+
+
async fn get_timeseries(
+
&self,
+
collections: Vec<Nsid>,
+
since: HourTruncatedCursor,
+
until: Option<HourTruncatedCursor>,
+
step: u64,
+
) -> StorageResult<(Vec<HourTruncatedCursor>, HashMap<Nsid, Vec<CountsValue>>)>;
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)>;
+77 -1
ufos/src/storage_fjall.rs
···
NewRollupCursorKey, NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal,
RecordLocationKey, RecordLocationMeta, RecordLocationVal, RecordRawValue, SketchSecretKey,
SketchSecretPrefix, TakeoffKey, TakeoffValue, TrimCollectionCursorKey, WeekTruncatedCursor,
-
WeeklyDidsKey, WeeklyRecordsKey, WeeklyRollupKey, WithCollection, WithRank,
+
WeeklyDidsKey, WeeklyRecordsKey, WeeklyRollupKey, WithCollection, WithRank, HOUR_IN_MICROS,
+
WEEK_IN_MICROS,
};
use crate::{
CommitAction, ConsumerInfo, Did, EventBatch, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord,
···
)))
}
+
type CollectionSerieses = HashMap<Nsid, Vec<CountsValue>>;
+
impl FjallReader {
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
let rollup_cursor =
···
}
}
+
/// - step: output series time step, in seconds
+
fn get_timeseries(
+
&self,
+
collections: Vec<Nsid>,
+
since: HourTruncatedCursor,
+
until: Option<HourTruncatedCursor>,
+
step: u64,
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
+
if step > WEEK_IN_MICROS {
+
panic!("week-stepping is todo");
+
}
+
let until = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
+
let Ok(dt) = Cursor::from(until).duration_since(&Cursor::from(since)) else {
+
return Ok((
+
// empty: until < since
+
vec![],
+
collections.into_iter().map(|c| (c, vec![])).collect(),
+
));
+
};
+
let n_hours = (dt.as_micros() as u64) / HOUR_IN_MICROS;
+
let mut counts_by_hour = Vec::with_capacity(n_hours as usize);
+
let snapshot = self.rollups.snapshot();
+
for hour in (0..n_hours).map(|i| since.nth_next(i)) {
+
let mut counts = Vec::with_capacity(collections.len());
+
for nsid in &collections {
+
let count = snapshot
+
.get(&HourlyRollupKey::new(hour, nsid).to_db_bytes()?)?
+
.as_deref()
+
.map(db_complete::<CountsValue>)
+
.transpose()?
+
.unwrap_or_default();
+
counts.push(count);
+
}
+
counts_by_hour.push((hour, counts));
+
}
+
+
let step_hours = step / (HOUR_IN_MICROS / 1_000_000);
+
let mut output_hours = Vec::with_capacity(step_hours as usize);
+
let mut output_series: CollectionSerieses = collections
+
.iter()
+
.map(|c| (c.clone(), Vec::with_capacity(step_hours as usize)))
+
.collect();
+
+
for chunk in counts_by_hour.chunks(step_hours as usize) {
+
output_hours.push(chunk[0].0); // always guaranteed to have at least one element in a chunks chunk
+
for (i, collection) in collections.iter().enumerate() {
+
let mut c = CountsValue::default();
+
for (_, counts) in chunk {
+
c.merge(&counts[i]);
+
}
+
output_series
+
.get_mut(collection)
+
.expect("output series is initialized with all collections")
+
.push(c);
+
}
+
}
+
+
Ok((output_hours, output_series))
+
}
+
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
// 0. grab a snapshot in case rollups happen while we're working
let instant = self.keyspace.instant();
···
let s = self.clone();
tokio::task::spawn_blocking(move || {
FjallReader::get_collections(&s, limit, order, since, until)
+
})
+
.await?
+
}
+
async fn get_timeseries(
+
&self,
+
collections: Vec<Nsid>,
+
since: HourTruncatedCursor,
+
until: Option<HourTruncatedCursor>,
+
step: u64,
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
+
let s = self.clone();
+
tokio::task::spawn_blocking(move || {
+
FjallReader::get_timeseries(&s, collections, since, until, step)
})
.await?
}
+9
ufos/src/storage_mem.rs
···
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
todo!()
}
+
async fn get_timeseries(
+
&self,
+
_: Vec<Nsid>,
+
_: HourTruncatedCursor,
+
_: Option<HourTruncatedCursor>,
+
_: u64,
+
) -> StorageResult<(Vec<HourTruncatedCursor>, HashMap<Nsid, Vec<CountsValue>>)> {
+
todo!()
+
}
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
let s = self.clone();
let collection = collection.clone();
+14 -3
ufos/src/store_types.rs
···
DbBytes, DbConcat, DbStaticStr, EncodingError, EncodingResult, SerdeBytes, StaticStr,
UseBincodePlz,
};
-
use crate::{Cursor, Did, Nsid, PutAction, RecordKey, UFOsCommit};
+
use crate::{Cursor, Did, JustCount, Nsid, PutAction, RecordKey, UFOsCommit};
use bincode::{Decode, Encode};
use cardinality_estimator_safe::Sketch;
use std::ops::{Bound, Range};
···
}
}
}
+
impl From<&CountsValue> for JustCount {
+
fn from(cv: &CountsValue) -> Self {
+
Self {
+
records: cv.records(),
+
dids_estimate: cv.dids().estimate() as u64,
+
}
+
}
+
}
static_str!("delete_acount", _DeleteAccountStaticStr);
pub type DeleteAccountStaticPrefix = DbStaticStr<_DeleteAccountStaticStr>;
···
pub fn next(&self) -> Self {
Self(self.0 + MOD)
}
+
pub fn nth_next(&self, n: u64) -> Self {
+
Self(self.0 + (n * MOD))
+
}
pub fn prev(&self) -> Self {
if self.0 < MOD {
panic!("underflow: previous truncation start would be less than zero");
···
}
}
-
const HOUR_IN_MICROS: u64 = 1_000_000 * 3600;
+
pub const HOUR_IN_MICROS: u64 = 1_000_000 * 3600;
pub type HourTruncatedCursor = TruncatedCursor<HOUR_IN_MICROS>;
-
const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7;
+
pub const WEEK_IN_MICROS: u64 = HOUR_IN_MICROS * 24 * 7;
pub type WeekTruncatedCursor = TruncatedCursor<WEEK_IN_MICROS>;
#[derive(Debug, PartialEq)]