···
1
-
use crate::db_types::{db_complete, DbBytes, DbStaticStr, StaticStr};
1
+
use crate::db_types::{
2
+
db_complete, DbBytes, DbStaticStr, EncodingResult, StaticStr, SubPrefixBytes,
use crate::error::StorageError;
3
-
use crate::storage::{StorageResult, StorageWhatever, StoreReader, StoreWriter};
5
+
use crate::storage::{StorageResult, StorageWhatever, StoreBackground, StoreReader, StoreWriter};
use crate::store_types::{
5
-
AllTimeRollupKey, CountsValue, DeleteAccountQueueKey, DeleteAccountQueueVal,
6
-
HourTruncatedCursor, HourlyRollupKey, JetstreamCursorKey, JetstreamCursorValue,
7
-
JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey, NewRollupCursorKey,
8
-
NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal, RecordLocationKey,
9
-
RecordLocationMeta, RecordLocationVal, RecordRawValue, TakeoffKey, TakeoffValue,
10
-
WeekTruncatedCursor, WeeklyRollupKey,
7
+
AllTimeDidsKey, AllTimeRecordsKey, AllTimeRollupKey, CommitCounts, CountsValue, CursorBucket,
8
+
DeleteAccountQueueKey, DeleteAccountQueueVal, HourTruncatedCursor, HourlyDidsKey,
9
+
HourlyRecordsKey, HourlyRollupKey, HourlyRollupStaticPrefix, JetstreamCursorKey,
10
+
JetstreamCursorValue, JetstreamEndpointKey, JetstreamEndpointValue, LiveCountsKey,
11
+
NewRollupCursorKey, NewRollupCursorValue, NsidRecordFeedKey, NsidRecordFeedVal,
12
+
RecordLocationKey, RecordLocationMeta, RecordLocationVal, RecordRawValue, SketchSecretKey,
13
+
SketchSecretPrefix, TakeoffKey, TakeoffValue, TrimCollectionCursorKey, WeekTruncatedCursor,
14
+
WeeklyDidsKey, WeeklyRecordsKey, WeeklyRollupKey, WithCollection, WithRank, HOUR_IN_MICROS,
12
-
use crate::{CommitAction, ConsumerInfo, Did, EventBatch, Nsid, TopCollections, UFOsRecord};
18
+
nice_duration, CommitAction, ConsumerInfo, Did, EncodingError, EventBatch, JustCount, Nsid,
19
+
NsidCount, NsidPrefix, OrderCollectionsBy, PrefixChild, PrefixCount, UFOsRecord,
use async_trait::async_trait;
14
-
use fjall::{Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle};
23
+
Batch as FjallBatch, Config, Keyspace, PartitionCreateOptions, PartitionHandle, Snapshot,
use jetstream::events::Cursor;
16
-
use std::collections::HashMap;
26
+
use std::collections::{HashMap, HashSet};
27
+
use std::iter::Peekable;
28
+
use std::ops::Bound;
18
-
use std::time::SystemTime;
31
+
atomic::{AtomicBool, Ordering},
34
+
use std::time::{Duration, Instant, SystemTime};
20
-
const MAX_BATCHED_CLEANUP_SIZE: usize = 1024; // try to commit progress for longer feeds
const MAX_BATCHED_ACCOUNT_DELETE_RECORDS: usize = 1024;
const MAX_BATCHED_ROLLUP_COUNTS: usize = 256;
···
/// - key: "takeoff" (literal)
/// - val: u64 (micros timestamp, not from jetstream for now so not precise)
56
+
/// - Cardinality estimator secret
57
+
/// - key: "sketch_secret" (literal)
/// - Rollup cursor (bg work: roll stats into hourlies, delete accounts, old record deletes)
/// - key: "rollup_cursor" (literal)
/// - val: u64 (tracks behind js_cursor)
64
+
/// - Feed trim cursor (bg work: delete oldest excess records)
65
+
/// - key: "trim_cursor" || nullstr (nsid)
66
+
/// - val: u64 (earliest previously-removed feed entry jetstream cursor)
···
/// - key: "live_counts" || u64 || nullstr (js_cursor, nsid)
/// - val: u64 || HLL (count (not cursor), estimator)
/// - Hourly total record counts and dids estimate per collection
/// - key: "hourly_counts" || u64 || nullstr (hour, nsid)
/// - val: u64 || HLL (count (not cursor), estimator)
93
+
/// - Hourly record count ranking
94
+
/// - key: "hourly_rank_records" || u64 || u64 || nullstr (hour, count, nsid)
97
+
/// - Hourly did estimate ranking
98
+
/// - key: "hourly_rank_dids" || u64 || u64 || nullstr (hour, dids estimate, nsid)
/// - Weekly total record counts and dids estimate per collection
71
-
/// - key: "weekly_counts" || u64 || nullstr (hour, nsid)
103
+
/// - key: "weekly_counts" || u64 || nullstr (week, nsid)
/// - val: u64 || HLL (count (not cursor), estimator)
106
+
/// - Weekly record count ranking
107
+
/// - key: "weekly_rank_records" || u64 || u64 || nullstr (week, count, nsid)
110
+
/// - Weekly did estimate ranking
111
+
/// - key: "weekly_rank_dids" || u64 || u64 || nullstr (week, dids estimate, nsid)
/// - All-time total record counts and dids estimate per collection
/// - key: "ever_counts" || nullstr (nsid)
/// - val: u64 || HLL (count (not cursor), estimator)
78
-
/// - TODO: sorted indexes for all-times?
119
+
/// - All-time total record record count ranking
120
+
/// - key: "ever_rank_records" || u64 || nullstr (count, nsid)
123
+
/// - All-time did estimate ranking
124
+
/// - key: "ever_rank_dids" || u64 || nullstr (dids estimate, nsid)
···
102
-
impl StorageWhatever<FjallReader, FjallWriter, FjallConfig> for FjallStorage {
149
+
impl StorageWhatever<FjallReader, FjallWriter, FjallBackground, FjallConfig> for FjallStorage {
108
-
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>)> {
155
+
) -> StorageResult<(FjallReader, FjallWriter, Option<Cursor>, SketchSecretPrefix)> {
let config = Config::new(path);
113
-
let config = config.fsync_ms(Some(4_000));
159
+
// #[cfg(not(test))]
160
+
// let config = config.fsync_ms(Some(4_000));
···
let js_cursor = get_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?;
126
-
if js_cursor.is_some() {
173
+
let sketch_secret = if js_cursor.is_some() {
get_static_neu::<JetstreamEndpointKey, JetstreamEndpointValue>(&global)?;
let JetstreamEndpointValue(stored) = stored_endpoint.ok_or(StorageError::InitError(
"found cursor but missing js_endpoint, refusing to start.".to_string(),
180
+
let Some(stored_secret) =
181
+
get_static_neu::<SketchSecretKey, SketchSecretPrefix>(&global)?
183
+
return Err(StorageError::InitError(
184
+
"found cursor but missing sketch_secret, refusing to start.".to_string(),
···
return Err(StorageError::InitError(format!(
143
-
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start.")));
197
+
"stored js_endpoint {stored:?} differs from provided {endpoint:?}, refusing to start without --jetstream-force.")));
147
-
insert_static_neu::<JetstreamEndpointKey>(
202
+
log::info!("initializing a fresh db!");
203
+
init_static_neu::<JetstreamEndpointKey>(
JetstreamEndpointValue(endpoint.to_string()),
151
-
insert_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
152
-
insert_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
208
+
log::info!("generating new secret for cardinality sketches...");
209
+
let mut sketch_secret: SketchSecretPrefix = [0u8; 16];
210
+
getrandom::fill(&mut sketch_secret).map_err(|e| {
211
+
StorageError::InitError(format!(
212
+
"failed to get a random secret for cardinality sketches: {e:?}"
215
+
init_static_neu::<SketchSecretKey>(&global, sketch_secret)?;
217
+
init_static_neu::<TakeoffKey>(&global, Cursor::at(SystemTime::now()))?;
218
+
init_static_neu::<NewRollupCursorKey>(&global, Cursor::from_start())?;
let reader = FjallReader {
keyspace: keyspace.clone(),
···
rollups: rollups.clone(),
let writer = FjallWriter {
231
+
bg_taken: Arc::new(AtomicBool::new(false)),
···
170
-
Ok((reader, writer, js_cursor))
239
+
Ok((reader, writer, js_cursor, sketch_secret))
···
336
+
type GetCounts = Box<dyn FnOnce() -> StorageResult<CountsValue>>;
337
+
type GetByterCounts = StorageResult<(Nsid, GetCounts)>;
338
+
type NsidCounter = Box<dyn Iterator<Item = GetByterCounts>>;
339
+
fn get_lexi_iter<T: WithCollection + DbBytes + 'static>(
340
+
snapshot: &Snapshot,
341
+
start: Bound<Vec<u8>>,
342
+
end: Bound<Vec<u8>>,
343
+
) -> StorageResult<NsidCounter> {
344
+
Ok(Box::new(snapshot.range((start, end)).map(|kv| {
345
+
let (k_bytes, v_bytes) = kv?;
346
+
let key = db_complete::<T>(&k_bytes)?;
347
+
let nsid = key.collection().clone();
348
+
let get_counts: GetCounts = Box::new(move || Ok(db_complete::<CountsValue>(&v_bytes)?));
349
+
Ok((nsid, get_counts))
352
+
type GetRollupKey = Arc<dyn Fn(&Nsid) -> EncodingResult<Vec<u8>>>;
353
+
fn get_lookup_iter<T: WithCollection + WithRank + DbBytes + 'static>(
354
+
snapshot: lsm_tree::Snapshot,
355
+
start: Bound<Vec<u8>>,
356
+
end: Bound<Vec<u8>>,
357
+
get_rollup_key: GetRollupKey,
358
+
) -> StorageResult<NsidCounter> {
359
+
Ok(Box::new(snapshot.range((start, end)).rev().map(
361
+
let (k_bytes, _) = kv?;
362
+
let key = db_complete::<T>(&k_bytes)?;
363
+
let nsid = key.collection().clone();
364
+
let get_counts: GetCounts = Box::new({
365
+
let nsid = nsid.clone();
366
+
let snapshot = snapshot.clone();
367
+
let get_rollup_key = get_rollup_key.clone();
369
+
let db_count_bytes = snapshot.get(get_rollup_key(&nsid)?)?.expect(
370
+
"integrity: all-time rank rollup must have corresponding all-time count rollup",
372
+
Ok(db_complete::<CountsValue>(&db_count_bytes)?)
375
+
Ok((nsid, get_counts))
380
+
type CollectionSerieses = HashMap<Nsid, Vec<CountsValue>>;
fn get_storage_stats(&self) -> StorageResult<serde_json::Value> {
···
get_snapshot_static_neu::<JetstreamCursorKey, JetstreamCursorValue>(&global)?
.map(|c| c.to_raw_u64());
416
+
let rollup_cursor =
417
+
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?
418
+
.map(|c| c.to_raw_u64());
Ok(ConsumerInfo::Jetstream {
308
-
fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
309
-
// TODO: limit nsid traversal depth
310
-
// TODO: limit nsid traversal breadth
311
-
// TODO: be serious about anything
428
+
fn get_earliest_hour(&self, rollups: Option<&Snapshot>) -> StorageResult<HourTruncatedCursor> {
429
+
let cursor = rollups
430
+
.unwrap_or(&self.rollups.snapshot())
431
+
.prefix(HourlyRollupStaticPrefix::default().to_db_bytes()?)
434
+
.map(|(key_bytes, _)| db_complete::<HourlyRollupKey>(&key_bytes))
436
+
.map(|key| key.cursor())
437
+
.unwrap_or_else(|| Cursor::from_start().into());
441
+
fn get_lexi_collections(
443
+
snapshot: Snapshot,
445
+
cursor: Option<Vec<u8>>,
446
+
buckets: Vec<CursorBucket>,
447
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
448
+
let cursor_nsid = cursor.as_deref().map(db_complete::<Nsid>).transpose()?;
449
+
let mut iters: Vec<Peekable<NsidCounter>> = Vec::with_capacity(buckets.len());
450
+
for bucket in &buckets {
451
+
let it: NsidCounter = match bucket {
452
+
CursorBucket::Hour(t) => {
453
+
let start = cursor_nsid
455
+
.map(|nsid| HourlyRollupKey::after_nsid(*t, nsid))
456
+
.unwrap_or_else(|| HourlyRollupKey::start(*t))?;
457
+
let end = HourlyRollupKey::end(*t)?;
458
+
get_lexi_iter::<HourlyRollupKey>(&snapshot, start, end)?
460
+
CursorBucket::Week(t) => {
461
+
let start = cursor_nsid
463
+
.map(|nsid| WeeklyRollupKey::after_nsid(*t, nsid))
464
+
.unwrap_or_else(|| WeeklyRollupKey::start(*t))?;
465
+
let end = WeeklyRollupKey::end(*t)?;
466
+
get_lexi_iter::<WeeklyRollupKey>(&snapshot, start, end)?
468
+
CursorBucket::AllTime => {
469
+
let start = cursor_nsid
471
+
.map(AllTimeRollupKey::after_nsid)
472
+
.unwrap_or_else(AllTimeRollupKey::start)?;
473
+
let end = AllTimeRollupKey::end()?;
474
+
get_lexi_iter::<AllTimeRollupKey>(&snapshot, start, end)?
477
+
iters.push(it.peekable());
313
-
// TODO: probably use a stack of segments to reduce to ~log-n merges
480
+
let mut out = Vec::new();
481
+
let mut current_nsid = None;
482
+
for _ in 0..limit {
483
+
// double-scan the iters for each element: this could be eliminated but we're starting simple.
484
+
// first scan: find the lowest nsid
485
+
// second scan: take + merge, and advance all iters with lowest nsid
486
+
let mut lowest: Option<Nsid> = None;
487
+
for iter in &mut iters {
488
+
if let Some(bla) = iter.peek_mut() {
489
+
let (nsid, _) = match bla {
491
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
493
+
lowest = match lowest {
494
+
Some(ref current) if nsid.as_str() > current.as_str() => lowest,
495
+
_ => Some(nsid.clone()),
499
+
current_nsid = lowest.clone();
500
+
let Some(nsid) = lowest else { break };
317
-
counts: CountsValue,
318
-
children: HashMap<String, Blah>,
502
+
let mut merged = CountsValue::default();
503
+
for iter in &mut iters {
504
+
// unwrap: potential fjall error was already checked & bailed over when peeking in the first loop
505
+
if let Some(Ok((_, get_counts))) = iter.next_if(|v| v.as_ref().unwrap().0 == nsid) {
506
+
let counts = get_counts()?;
507
+
merged.merge(&counts);
510
+
out.push(NsidCount {
511
+
nsid: nsid.to_string(),
512
+
creates: merged.counts().creates,
513
+
dids_estimate: merged.dids().estimate() as u64,
320
-
impl From<&Blah> for TopCollections {
321
-
fn from(bla: &Blah) -> Self {
323
-
total_records: bla.counts.records(),
324
-
dids_estimate: bla.counts.dids().estimate() as u64,
325
-
nsid_child_segments: HashMap::from_iter(
326
-
bla.children.iter().map(|(k, v)| (k.to_string(), v.into())),
517
+
let next_cursor = current_nsid.map(|s| s.to_db_bytes()).transpose()?;
518
+
Ok((out, next_cursor))
521
+
fn get_ordered_collections(
523
+
snapshot: Snapshot,
525
+
order: OrderCollectionsBy,
526
+
buckets: Vec<CursorBucket>,
527
+
) -> StorageResult<Vec<NsidCount>> {
528
+
let mut iters: Vec<NsidCounter> = Vec::with_capacity(buckets.len());
530
+
for bucket in buckets {
531
+
let it: NsidCounter = match (&order, bucket) {
532
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::Hour(t)) => {
533
+
get_lookup_iter::<HourlyRecordsKey>(
535
+
HourlyRecordsKey::start(t)?,
536
+
HourlyRecordsKey::end(t)?,
538
+
move |collection| HourlyRollupKey::new(t, collection).to_db_bytes()
542
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::Hour(t)) => {
543
+
get_lookup_iter::<HourlyDidsKey>(
545
+
HourlyDidsKey::start(t)?,
546
+
HourlyDidsKey::end(t)?,
548
+
move |collection| HourlyRollupKey::new(t, collection).to_db_bytes()
552
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::Week(t)) => {
553
+
get_lookup_iter::<WeeklyRecordsKey>(
555
+
WeeklyRecordsKey::start(t)?,
556
+
WeeklyRecordsKey::end(t)?,
558
+
move |collection| WeeklyRollupKey::new(t, collection).to_db_bytes()
562
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::Week(t)) => {
563
+
get_lookup_iter::<WeeklyDidsKey>(
565
+
WeeklyDidsKey::start(t)?,
566
+
WeeklyDidsKey::end(t)?,
568
+
move |collection| WeeklyRollupKey::new(t, collection).to_db_bytes()
572
+
(OrderCollectionsBy::RecordsCreated, CursorBucket::AllTime) => {
573
+
get_lookup_iter::<AllTimeRecordsKey>(
575
+
AllTimeRecordsKey::start()?,
576
+
AllTimeRecordsKey::end()?,
577
+
Arc::new(|collection| AllTimeRollupKey::new(collection).to_db_bytes()),
580
+
(OrderCollectionsBy::DidsEstimate, CursorBucket::AllTime) => {
581
+
get_lookup_iter::<AllTimeDidsKey>(
583
+
AllTimeDidsKey::start()?,
584
+
AllTimeDidsKey::end()?,
585
+
Arc::new(|collection| AllTimeRollupKey::new(collection).to_db_bytes()),
588
+
(OrderCollectionsBy::Lexi { .. }, _) => unreachable!(),
593
+
// overfetch by taking a bit more than the limit
594
+
// merge by collection
595
+
// sort by requested order, take limit, discard all remaining
597
+
// this isn't guaranteed to be correct, but it will hopefully be close most of the time:
598
+
// - it's possible that some NSIDs might score low during some time-buckets, and miss being merged
599
+
// - overfetching hopefully helps a bit by catching nsids near the threshold more often, but. yeah.
601
+
// this thing is heavy, there's probably a better way
602
+
let mut ranked: HashMap<Nsid, CountsValue> = HashMap::with_capacity(limit * 2);
603
+
for iter in iters {
604
+
for pair in iter.take((limit as f64 * 1.3).ceil() as usize) {
605
+
let (nsid, get_counts) = pair?;
606
+
let counts = get_counts()?;
607
+
ranked.entry(nsid).or_default().merge(&counts);
610
+
let mut ranked: Vec<(Nsid, CountsValue)> = ranked.into_iter().collect();
612
+
OrderCollectionsBy::RecordsCreated => ranked.sort_by_key(|(_, c)| c.counts().creates),
613
+
OrderCollectionsBy::DidsEstimate => ranked.sort_by_key(|(_, c)| c.dids().estimate()),
614
+
OrderCollectionsBy::Lexi { .. } => unreachable!(),
616
+
let counts = ranked
620
+
.map(|(nsid, cv)| NsidCount {
621
+
nsid: nsid.to_string(),
622
+
creates: cv.counts().creates,
623
+
dids_estimate: cv.dids().estimate() as u64,
332
-
let mut b = Blah::default();
333
-
let prefix = AllTimeRollupKey::from_prefix_to_db_bytes(&Default::default())?;
334
-
for kv in self.rollups.prefix(&prefix.to_db_bytes()?) {
335
-
let (key_bytes, val_bytes) = kv?;
336
-
let key = db_complete::<AllTimeRollupKey>(&key_bytes)?;
337
-
let val = db_complete::<CountsValue>(&val_bytes)?;
629
+
fn get_collections(
632
+
order: OrderCollectionsBy,
633
+
since: Option<HourTruncatedCursor>,
634
+
until: Option<HourTruncatedCursor>,
635
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
636
+
let snapshot = self.rollups.snapshot();
637
+
let buckets = if let (None, None) = (since, until) {
638
+
vec![CursorBucket::AllTime]
640
+
let mut lower = self.get_earliest_hour(Some(&snapshot))?;
641
+
if let Some(specified) = since {
642
+
if specified > lower {
646
+
let upper = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
647
+
CursorBucket::buckets_spanning(lower, upper)
650
+
OrderCollectionsBy::Lexi { cursor } => {
651
+
self.get_lexi_collections(snapshot, limit, cursor, buckets)
654
+
self.get_ordered_collections(snapshot, limit, order, buckets)?,
339
-
let mut node = &mut b;
340
-
node.counts.merge(&val);
341
-
for segment in key.collection().split('.') {
342
-
node = node.children.entry(segment.to_string()).or_default();
343
-
node.counts.merge(&val);
660
+
fn get_lexi_prefix(
662
+
snapshot: Snapshot,
663
+
prefix: NsidPrefix,
665
+
cursor: Option<Vec<u8>>,
666
+
buckets: Vec<CursorBucket>,
667
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
668
+
// let prefix_sub_with_null = prefix.as_str().to_string().to_db_bytes()?;
669
+
let prefix_sub = String::sub_prefix(&prefix.terminated())?; // with trailing dot to ensure full segment match
670
+
let cursor_child = cursor
672
+
.map(|encoded_bytes| {
673
+
let decoded: String = db_complete(encoded_bytes)?;
674
+
// TODO: write some tests for cursors, there's probably bugs here
675
+
let as_sub_prefix_with_null = decoded.to_db_bytes()?;
676
+
Ok::<_, EncodingError>(as_sub_prefix_with_null)
679
+
let mut iters: Vec<NsidCounter> = Vec::with_capacity(buckets.len());
680
+
for bucket in &buckets {
681
+
let it: NsidCounter = match bucket {
682
+
CursorBucket::Hour(t) => {
683
+
let start = cursor_child
685
+
.map(|child| HourlyRollupKey::after_nsid_prefix(*t, child))
686
+
.unwrap_or_else(|| HourlyRollupKey::after_nsid_prefix(*t, &prefix_sub))?;
687
+
let end = HourlyRollupKey::nsid_prefix_end(*t, &prefix_sub)?;
688
+
get_lexi_iter::<HourlyRollupKey>(&snapshot, start, end)?
690
+
CursorBucket::Week(t) => {
691
+
let start = cursor_child
693
+
.map(|child| WeeklyRollupKey::after_nsid_prefix(*t, child))
694
+
.unwrap_or_else(|| WeeklyRollupKey::after_nsid_prefix(*t, &prefix_sub))?;
695
+
let end = WeeklyRollupKey::nsid_prefix_end(*t, &prefix_sub)?;
696
+
get_lexi_iter::<WeeklyRollupKey>(&snapshot, start, end)?
698
+
CursorBucket::AllTime => {
699
+
let start = cursor_child
701
+
.map(|child| AllTimeRollupKey::after_nsid_prefix(child))
702
+
.unwrap_or_else(|| AllTimeRollupKey::after_nsid_prefix(&prefix_sub))?;
703
+
let end = AllTimeRollupKey::nsid_prefix_end(&prefix_sub)?;
704
+
get_lexi_iter::<AllTimeRollupKey>(&snapshot, start, end)?
711
+
let mut iters: Vec<_> = iters
715
+
bla.map(|(nsid, v)| {
716
+
let Some(child) = Child::from_prefix(&nsid, &prefix) else {
717
+
panic!("failed from_prefix: {nsid:?} {prefix:?} (bad iter bounds?)");
726
+
let mut items = Vec::new();
727
+
let mut prefix_count = CountsValue::default();
728
+
#[derive(Debug, Clone, PartialEq)]
731
+
ChildPrefix(String),
734
+
fn from_prefix(nsid: &Nsid, prefix: &NsidPrefix) -> Option<Self> {
735
+
if prefix.is_group_of(nsid) {
736
+
return Some(Child::FullNsid(nsid.to_string()));
738
+
let suffix = nsid.as_str().strip_prefix(&format!("{}.", prefix.0))?;
739
+
let (segment, _) = suffix.split_once('.').unwrap();
740
+
let child_prefix = format!("{}.{segment}", prefix.0);
741
+
Some(Child::ChildPrefix(child_prefix))
743
+
fn is_before(&self, other: &Child) -> bool {
744
+
match (self, other) {
745
+
(Child::FullNsid(s), Child::ChildPrefix(o)) if s == o => true,
746
+
(Child::ChildPrefix(s), Child::FullNsid(o)) if s == o => false,
747
+
(Child::FullNsid(s), Child::FullNsid(o)) => s < o,
748
+
(Child::ChildPrefix(s), Child::ChildPrefix(o)) => s < o,
749
+
(Child::FullNsid(s), Child::ChildPrefix(o)) => s < o,
750
+
(Child::ChildPrefix(s), Child::FullNsid(o)) => s < o,
753
+
fn into_inner(self) -> String {
755
+
Child::FullNsid(s) => s,
756
+
Child::ChildPrefix(s) => s,
760
+
let mut current_child: Option<Child> = None;
761
+
for _ in 0..limit {
762
+
// double-scan the iters for each element: this could be eliminated but we're starting simple.
763
+
// first scan: find the lowest nsid
764
+
// second scan: take + merge, and advance all iters with lowest nsid
765
+
let mut lowest: Option<Child> = None;
766
+
for iter in &mut iters {
767
+
if let Some(bla) = iter.peek_mut() {
768
+
let (child, _) = match bla {
770
+
Err(e) => Err(std::mem::replace(e, StorageError::Stolen))?,
773
+
lowest = match lowest {
774
+
Some(ref current) if current.is_before(child) => lowest,
775
+
_ => Some(child.clone()),
779
+
current_child = lowest.clone();
780
+
let Some(child) = lowest else { break };
782
+
let mut merged = CountsValue::default();
783
+
for iter in &mut iters {
784
+
// unwrap: potential fjall error was already checked & bailed over when peeking in the first loop
785
+
while let Some(Ok((_, get_counts))) =
786
+
iter.next_if(|v| v.as_ref().unwrap().0 == child)
788
+
let counts = get_counts()?;
789
+
prefix_count.merge(&counts);
790
+
merged.merge(&counts);
793
+
items.push(match child {
794
+
Child::FullNsid(nsid) => PrefixChild::Collection(NsidCount {
796
+
creates: merged.counts().creates,
797
+
dids_estimate: merged.dids().estimate() as u64,
799
+
Child::ChildPrefix(prefix) => PrefixChild::Prefix(PrefixCount {
801
+
creates: merged.counts().creates,
802
+
dids_estimate: merged.dids().estimate() as u64,
807
+
// TODO: could serialize the prefix count (with sketch) into the cursor so that uniqs can actually count up?
808
+
// ....er the sketch is probably too big
809
+
// TODO: this is probably buggy on child-type boundaries bleh
810
+
let next_cursor = current_child
811
+
.map(|s| s.into_inner().to_db_bytes())
814
+
Ok(((&prefix_count).into(), items, next_cursor))
350
-
fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
351
-
// 0. grab a snapshot in case rollups happen while we're working
352
-
let instant = self.keyspace.instant();
353
-
let global = self.global.snapshot_at(instant);
354
-
let rollups = self.rollups.snapshot_at(instant);
819
+
prefix: NsidPrefix,
821
+
order: OrderCollectionsBy,
822
+
since: Option<HourTruncatedCursor>,
823
+
until: Option<HourTruncatedCursor>,
824
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
825
+
let snapshot = self.rollups.snapshot();
826
+
let buckets = if let (None, None) = (since, until) {
827
+
vec![CursorBucket::AllTime]
829
+
let mut lower = self.get_earliest_hour(Some(&snapshot))?;
830
+
if let Some(specified) = since {
831
+
if specified > lower {
835
+
let upper = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
836
+
CursorBucket::buckets_spanning(lower, upper)
839
+
OrderCollectionsBy::Lexi { cursor } => {
840
+
self.get_lexi_prefix(snapshot, prefix, limit, cursor, buckets)
356
-
// 1. all-time counts
357
-
let all_time_key = AllTimeRollupKey::new(collection).to_db_bytes()?;
358
-
let mut total_counts = rollups
359
-
.get(&all_time_key)?
361
-
.map(db_complete::<CountsValue>)
363
-
.unwrap_or_default();
846
+
/// - step: output series time step, in seconds
849
+
collections: Vec<Nsid>,
850
+
since: HourTruncatedCursor,
851
+
until: Option<HourTruncatedCursor>,
853
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
854
+
if step > WEEK_IN_MICROS {
855
+
panic!("week-stepping is todo");
857
+
let until = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
858
+
let Ok(dt) = Cursor::from(until).duration_since(&Cursor::from(since)) else {
860
+
// empty: until < since
862
+
collections.into_iter().map(|c| (c, vec![])).collect(),
865
+
let n_hours = (dt.as_micros() as u64) / HOUR_IN_MICROS;
866
+
let mut counts_by_hour = Vec::with_capacity(n_hours as usize);
867
+
let snapshot = self.rollups.snapshot();
868
+
for hour in (0..n_hours).map(|i| since.nth_next(i)) {
869
+
let mut counts = Vec::with_capacity(collections.len());
870
+
for nsid in &collections {
871
+
let count = snapshot
872
+
.get(&HourlyRollupKey::new(hour, nsid).to_db_bytes()?)?
874
+
.map(db_complete::<CountsValue>)
876
+
.unwrap_or_default();
877
+
counts.push(count);
879
+
counts_by_hour.push((hour, counts));
365
-
// 2. live counts that haven't been rolled into all-time yet.
366
-
let rollup_cursor =
367
-
get_snapshot_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&global)?.ok_or(
368
-
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
882
+
let step_hours = step / (HOUR_IN_MICROS / 1_000_000);
883
+
let mut output_hours = Vec::with_capacity(step_hours as usize);
884
+
let mut output_series: CollectionSerieses = collections
886
+
.map(|c| (c.clone(), Vec::with_capacity(step_hours as usize)))
371
-
let full_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
372
-
for kv in rollups.range(full_range) {
373
-
let (key_bytes, val_bytes) = kv?;
374
-
let key = db_complete::<LiveCountsKey>(&key_bytes)?;
375
-
if key.collection() == collection {
376
-
let counts = db_complete::<CountsValue>(&val_bytes)?;
377
-
total_counts.merge(&counts);
889
+
for chunk in counts_by_hour.chunks(step_hours as usize) {
890
+
output_hours.push(chunk[0].0); // always guaranteed to have at least one element in a chunks chunk
891
+
for (i, collection) in collections.iter().enumerate() {
892
+
let mut c = CountsValue::default();
893
+
for (_, counts) in chunk {
894
+
c.merge(&counts[i]);
897
+
.get_mut(collection)
898
+
.expect("output series is initialized with all collections")
381
-
total_counts.records(),
382
-
total_counts.dids().estimate() as u64,
903
+
Ok((output_hours, output_series))
906
+
fn get_collection_counts(
909
+
since: HourTruncatedCursor,
910
+
until: Option<HourTruncatedCursor>,
911
+
) -> StorageResult<JustCount> {
912
+
// grab snapshots in case rollups happen while we're working
913
+
let rollups = self.rollups.snapshot();
915
+
let until = until.unwrap_or_else(|| Cursor::at(SystemTime::now()).into());
916
+
let buckets = CursorBucket::buckets_spanning(since, until);
917
+
let mut total_counts = CountsValue::default();
919
+
for bucket in buckets {
920
+
let key = match bucket {
921
+
CursorBucket::Hour(t) => HourlyRollupKey::new(t, collection).to_db_bytes()?,
922
+
CursorBucket::Week(t) => WeeklyRollupKey::new(t, collection).to_db_bytes()?,
923
+
CursorBucket::AllTime => unreachable!(), // TODO: fall back on this if the time span spans the whole dataset?
925
+
let count = rollups
928
+
.map(db_complete::<CountsValue>)
930
+
.unwrap_or_default();
931
+
total_counts.merge(&count);
934
+
Ok((&total_counts).into())
fn get_records_by_collections(
388
-
collections: &[Nsid],
939
+
collections: HashSet<Nsid>,
expand_each_collection: bool,
) -> StorageResult<Vec<UFOsRecord>> {
···
let mut record_iterators = Vec::new();
for collection in collections {
397
-
let iter = RecordIterator::new(&self.feeds, self.records.clone(), collection, limit)?;
948
+
let iter = RecordIterator::new(&self.feeds, self.records.clone(), &collection, limit)?;
record_iterators.push(iter.peekable());
let mut merged = Vec::new();
···
tokio::task::spawn_blocking(move || FjallReader::get_consumer_info(&s)).await?
449
-
async fn get_top_collections(&self) -> Result<TopCollections, StorageError> {
1000
+
async fn get_collections(
1003
+
order: OrderCollectionsBy,
1004
+
since: Option<HourTruncatedCursor>,
1005
+
until: Option<HourTruncatedCursor>,
1006
+
) -> StorageResult<(Vec<NsidCount>, Option<Vec<u8>>)> {
451
-
tokio::task::spawn_blocking(move || FjallReader::get_top_collections(&s)).await?
1008
+
tokio::task::spawn_blocking(move || {
1009
+
FjallReader::get_collections(&s, limit, order, since, until)
453
-
async fn get_counts_by_collection(&self, collection: &Nsid) -> StorageResult<(u64, u64)> {
1013
+
async fn get_prefix(
1015
+
prefix: NsidPrefix,
1017
+
order: OrderCollectionsBy,
1018
+
since: Option<HourTruncatedCursor>,
1019
+
until: Option<HourTruncatedCursor>,
1020
+
) -> StorageResult<(JustCount, Vec<PrefixChild>, Option<Vec<u8>>)> {
1021
+
let s = self.clone();
1022
+
tokio::task::spawn_blocking(move || {
1023
+
FjallReader::get_prefix(&s, prefix, limit, order, since, until)
1027
+
async fn get_timeseries(
1029
+
collections: Vec<Nsid>,
1030
+
since: HourTruncatedCursor,
1031
+
until: Option<HourTruncatedCursor>,
1033
+
) -> StorageResult<(Vec<HourTruncatedCursor>, CollectionSerieses)> {
1034
+
let s = self.clone();
1035
+
tokio::task::spawn_blocking(move || {
1036
+
FjallReader::get_timeseries(&s, collections, since, until, step)
1040
+
async fn get_collection_counts(
1042
+
collection: &Nsid,
1043
+
since: HourTruncatedCursor,
1044
+
until: Option<HourTruncatedCursor>,
1045
+
) -> StorageResult<JustCount> {
let collection = collection.clone();
456
-
tokio::task::spawn_blocking(move || FjallReader::get_counts_by_collection(&s, &collection))
1048
+
tokio::task::spawn_blocking(move || {
1049
+
FjallReader::get_collection_counts(&s, &collection, since, until)
async fn get_records_by_collections(
461
-
collections: &[Nsid],
1055
+
collections: HashSet<Nsid>,
expand_each_collection: bool,
) -> StorageResult<Vec<UFOsRecord>> {
466
-
let collections = collections.to_vec();
tokio::task::spawn_blocking(move || {
468
-
FjallReader::get_records_by_collections(&s, &collections, limit, expand_each_collection)
1061
+
FjallReader::get_records_by_collections(&s, collections, limit, expand_each_collection)
1069
+
bg_taken: Arc<AtomicBool>,
···
timelies: impl Iterator<Item = Result<(fjall::Slice, fjall::Slice), fjall::Error>>,
cursor_exclusive_limit: Option<Cursor>,
504
-
) -> StorageResult<usize> {
1099
+
) -> StorageResult<(usize, HashSet<Nsid>)> {
// current strategy is to buffer counts in mem before writing the rollups
// we *could* read+write every single batch to rollup.. but their merge is associative so
// ...so save the db some work up front? is this worth it? who knows...
1104
+
let mut dirty_nsids = HashSet::new();
#[derive(Eq, Hash, PartialEq)]
···
1133
+
dirty_nsids.insert(key.collection().clone());
batch.remove(&self.rollups, key_bytes);
let val = db_complete::<CountsValue>(&val_bytes)?;
···
last_cursor = key.cursor();
1160
+
// go through each new rollup thing and merge it with whatever might already be in the db
for ((nsid, rollup), counts) in counts_by_rollup {
562
-
let key_bytes = match rollup {
1162
+
let rollup_key_bytes = match rollup {
Rollup::Hourly(hourly_cursor) => {
564
-
let k = HourlyRollupKey::new(hourly_cursor, &nsid);
1164
+
HourlyRollupKey::new(hourly_cursor, &nsid).to_db_bytes()?
Rollup::Weekly(weekly_cursor) => {
568
-
let k = WeeklyRollupKey::new(weekly_cursor, &nsid);
1167
+
WeeklyRollupKey::new(weekly_cursor, &nsid).to_db_bytes()?
571
-
Rollup::AllTime => {
572
-
let k = AllTimeRollupKey::new(&nsid);
1169
+
Rollup::AllTime => AllTimeRollupKey::new(&nsid).to_db_bytes()?,
let mut rolled: CountsValue = self
1173
+
.get(&rollup_key_bytes)?
.map(db_complete::<CountsValue>)
584
-
// try to round-trip before inserting, for funsies
585
-
let tripppin = counts.to_db_bytes()?;
586
-
let (and_back, n) = CountsValue::from_db_bytes(&tripppin)?;
587
-
assert_eq!(n, tripppin.len());
588
-
assert_eq!(counts.prefix, and_back.prefix);
589
-
assert_eq!(counts.dids().estimate(), and_back.dids().estimate());
590
-
if counts.records() > 200_000_000_000 {
591
-
panic!("COUNTS maybe wtf? {counts:?}")
1179
+
// now that we have values, we can know the exising ranks
1180
+
let before_creates_count = rolled.counts().creates;
1181
+
let before_dids_estimate = rolled.dids().estimate() as u64;
1183
+
// update the rollup
1184
+
rolled.merge(&counts);
1187
+
let new_creates_count = rolled.counts().creates;
1188
+
let new_dids_estimate = rolled.dids().estimate() as u64;
1190
+
// update create-ranked secondary index if rank changed
1191
+
if new_creates_count != before_creates_count {
1192
+
let (old_k, new_k) = match rollup {
1193
+
Rollup::Hourly(cursor) => (
1194
+
HourlyRecordsKey::new(cursor, before_creates_count.into(), &nsid)
1196
+
HourlyRecordsKey::new(cursor, new_creates_count.into(), &nsid)
1199
+
Rollup::Weekly(cursor) => (
1200
+
WeeklyRecordsKey::new(cursor, before_creates_count.into(), &nsid)
1202
+
WeeklyRecordsKey::new(cursor, new_creates_count.into(), &nsid)
1205
+
Rollup::AllTime => (
1206
+
AllTimeRecordsKey::new(before_creates_count.into(), &nsid).to_db_bytes()?,
1207
+
AllTimeRecordsKey::new(new_creates_count.into(), &nsid).to_db_bytes()?,
1210
+
batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better
1211
+
batch.insert(&self.rollups, &new_k, "");
1214
+
// update dids-ranked secondary index if rank changed
1215
+
if new_dids_estimate != before_dids_estimate {
1216
+
let (old_k, new_k) = match rollup {
1217
+
Rollup::Hourly(cursor) => (
1218
+
HourlyDidsKey::new(cursor, before_dids_estimate.into(), &nsid)
1220
+
HourlyDidsKey::new(cursor, new_dids_estimate.into(), &nsid)
1223
+
Rollup::Weekly(cursor) => (
1224
+
WeeklyDidsKey::new(cursor, before_dids_estimate.into(), &nsid)
1226
+
WeeklyDidsKey::new(cursor, new_dids_estimate.into(), &nsid)
1229
+
Rollup::AllTime => (
1230
+
AllTimeDidsKey::new(before_dids_estimate.into(), &nsid).to_db_bytes()?,
1231
+
AllTimeDidsKey::new(new_dids_estimate.into(), &nsid).to_db_bytes()?,
1234
+
batch.remove(&self.rollups, &old_k); // TODO: when fjall gets weak delete, this will hopefully work way better
1235
+
batch.insert(&self.rollups, &new_k, "");
594
-
rolled.merge(&counts);
595
-
batch.insert(&self.rollups, &key_bytes, &rolled.to_db_bytes()?);
1238
+
// replace the main counts rollup
1239
+
batch.insert(&self.rollups, &rollup_key_bytes, &rolled.to_db_bytes()?);
insert_batch_static_neu::<NewRollupCursorKey>(&mut batch, &self.global, last_cursor)?;
601
-
Ok(cursors_advanced)
1245
+
Ok((cursors_advanced, dirty_nsids))
605
-
impl StoreWriter for FjallWriter {
1249
+
impl StoreWriter<FjallBackground> for FjallWriter {
1250
+
fn background_tasks(&mut self, reroll: bool) -> StorageResult<FjallBackground> {
1251
+
if self.bg_taken.swap(true, Ordering::SeqCst) {
1252
+
Err(StorageError::BackgroundAlreadyStarted)
1255
+
log::info!("reroll: resetting rollup cursor...");
1256
+
insert_static_neu::<NewRollupCursorKey>(&self.global, Cursor::from_start())?;
1257
+
log::info!("reroll: clearing trim cursors...");
1258
+
let mut batch = self.keyspace.batch();
1261
+
.prefix(TrimCollectionCursorKey::from_prefix_to_db_bytes(
1262
+
&Default::default(),
1266
+
batch.remove(&self.global, k);
1268
+
let n = batch.len();
1270
+
log::info!("reroll: cleared {n} trim cursors.");
1272
+
Ok(FjallBackground(self.clone()))
fn insert_batch<const LIMIT: usize>(
event_batch: EventBatch<LIMIT>,
···
let live_counts_key: LiveCountsKey = (latest, &nsid).into();
648
-
let counts_value = CountsValue::new(commits.total_seen as u64, commits.dids_estimate);
1318
+
let counts_value = CountsValue::new(
1320
+
creates: commits.creates as u64,
1321
+
updates: commits.updates as u64,
1322
+
deletes: commits.deletes as u64,
1324
+
commits.dids_estimate,
&live_counts_key.to_db_bytes()?,
···
676
-
fn step_rollup(&mut self) -> StorageResult<usize> {
1353
+
fn step_rollup(&mut self) -> StorageResult<(usize, HashSet<Nsid>)> {
1354
+
let mut dirty_nsids = HashSet::new();
get_static_neu::<NewRollupCursorKey, NewRollupCursorValue>(&self.global)?.ok_or(
StorageError::BadStateError("Could not find current rollup cursor".to_string()),
···
let live_counts_range = LiveCountsKey::range_from_cursor(rollup_cursor)?;
let mut timely_iter = self.rollups.range(live_counts_range).peekable();
686
-
let timely_next_cursor = timely_iter
1365
+
let timely_next = timely_iter
688
-
.map(|kv| -> StorageResult<Cursor> {
1367
+
.map(|kv| -> StorageResult<LiveCountsKey> {
Err(e) => Err(std::mem::replace(e, fjall::Error::Poisoned))?,
let key = db_complete::<LiveCountsKey>(key_bytes)?;
···
714
-
let cursors_stepped = match (timely_next_cursor, next_delete) {
716
-
Some(timely_next_cursor),
717
-
Some((delete_cursor, delete_key_bytes, delete_val_bytes)),
719
-
if timely_next_cursor < delete_cursor {
720
-
self.rollup_live_counts(
1393
+
let cursors_stepped = match (timely_next, next_delete) {
1394
+
(Some(timely), Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
1395
+
if timely.cursor() < delete_cursor {
1396
+
let (n, dirty) = self.rollup_live_counts(
MAX_BATCHED_ROLLUP_COUNTS,
1401
+
dirty_nsids.extend(dirty);
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
730
-
self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)?
1409
+
self.rollup_live_counts(timely_iter, None, MAX_BATCHED_ROLLUP_COUNTS)?;
1410
+
dirty_nsids.extend(dirty);
(None, Some((delete_cursor, delete_key_bytes, delete_val_bytes))) => {
self.rollup_delete_account(delete_cursor, &delete_key_bytes, &delete_val_bytes)?
···
738
-
Ok(cursors_stepped)
1419
+
Ok((cursors_stepped, dirty_nsids))
745
-
// TODO: could add a start cursor limit to avoid iterating deleted stuff at the start (/end)
746
-
) -> StorageResult<()> {
1427
+
) -> StorageResult<(usize, usize, bool)> {
let mut dangling_feed_keys_cleaned = 0;
let mut records_deleted = 0;
750
-
let mut batch = self.keyspace.batch();
1431
+
let live_range = if full_scan {
1432
+
let start = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
1433
+
let end = NsidRecordFeedKey::prefix_range_end(collection)?;
1436
+
let feed_trim_cursor_key =
1437
+
TrimCollectionCursorKey::new(collection.clone()).to_db_bytes()?;
1438
+
let trim_cursor = self
1440
+
.get(&feed_trim_cursor_key)?
1441
+
.map(|value_bytes| db_complete(&value_bytes))
1443
+
.unwrap_or(Cursor::from_start());
1444
+
NsidRecordFeedKey::from_pair(collection.clone(), trim_cursor).range_to_prefix_end()?
752
-
let prefix = NsidRecordFeedKey::from_prefix_to_db_bytes(collection)?;
754
-
for kv in self.feeds.prefix(prefix).rev() {
1447
+
let mut live_records_found = 0;
1448
+
let mut candidate_new_feed_lower_cursor = None;
1449
+
let ended_early = false;
1450
+
let mut current_cursor: Option<Cursor> = None;
1451
+
for (i, kv) in self.feeds.range(live_range).rev().enumerate() {
1452
+
if i > 0 && i % 500_000 == 0 {
1454
+
"trim: at {i} for {:?} (now at {})",
1455
+
collection.to_string(),
1459
+
.map(nice_duration)
1460
+
.unwrap_or("[not past]".into()))
1461
+
.unwrap_or("??".into()),
let (key_bytes, val_bytes) = kv?;
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
let feed_val = db_complete::<NsidRecordFeedVal>(&val_bytes)?;
···
let Some(location_val_bytes) = self.records.get(&location_key_bytes)? else {
// record was deleted (hopefully)
763
-
batch.remove(&self.feeds, &location_key_bytes);
1472
+
self.feeds.remove(&*key_bytes)?;
dangling_feed_keys_cleaned += 1;
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
1478
+
current_cursor = Some(meta.cursor());
if meta.cursor() != feed_key.cursor() {
// older/different version
772
-
batch.remove(&self.feeds, &location_key_bytes);
1482
+
self.feeds.remove(&*key_bytes)?;
dangling_feed_keys_cleaned += 1;
if meta.rev != feed_val.rev() {
log::warn!("record lookup: cursor match but rev did not...? removing.");
779
-
batch.remove(&self.feeds, &location_key_bytes);
1489
+
self.records.remove(&location_key_bytes)?;
1490
+
self.feeds.remove(&*key_bytes)?;
dangling_feed_keys_cleaned += 1;
784
-
if batch.len() >= MAX_BATCHED_CLEANUP_SIZE {
786
-
batch = self.keyspace.batch();
790
-
if found <= limit {
1495
+
live_records_found += 1;
1496
+
if live_records_found <= limit {
1499
+
if candidate_new_feed_lower_cursor.is_none() {
1500
+
candidate_new_feed_lower_cursor = Some(feed_key.cursor());
794
-
batch.remove(&self.feeds, &location_key_bytes);
795
-
batch.remove(&self.records, &location_key_bytes);
1503
+
self.feeds.remove(&location_key_bytes)?;
1504
+
self.feeds.remove(key_bytes)?;
1509
+
if let Some(new_cursor) = candidate_new_feed_lower_cursor {
1510
+
self.global.insert(
1511
+
&TrimCollectionCursorKey::new(collection.clone()).to_db_bytes()?,
1512
+
&new_cursor.to_db_bytes()?,
801
-
log::info!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records");
1517
+
log::trace!("trim_collection ({collection:?}) removed {dangling_feed_keys_cleaned} dangling feed entries and {records_deleted} records (ended early? {ended_early})");
1518
+
Ok((dangling_feed_keys_cleaned, records_deleted, ended_early))
fn delete_account(&mut self, did: &Did) -> Result<usize, StorageError> {
···
1539
+
pub struct FjallBackground(FjallWriter);
1542
+
impl StoreBackground for FjallBackground {
1543
+
async fn run(mut self, backfill: bool) -> StorageResult<()> {
1544
+
let mut dirty_nsids = HashSet::new();
1546
+
// backfill condition here is iffy -- longer is good when doing the main ingest and then collection trims
1547
+
// shorter once those are done helps things catch up
1548
+
// the best setting for non-backfill is non-obvious.. it can be pretty slow and still be fine
1550
+
tokio::time::interval(Duration::from_micros(if backfill { 100 } else { 32_000 }));
1551
+
rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
1553
+
// backfill condition again iffy. collection trims should probably happen in their own phase.
1554
+
let mut trim = tokio::time::interval(Duration::from_secs(if backfill { 18 } else { 9 }));
1555
+
trim.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
1559
+
_ = rollup.tick() => {
1560
+
let mut db = self.0.clone();
1561
+
let (n, dirty) = tokio::task::spawn_blocking(move || db.step_rollup()).await??;
1563
+
rollup.reset_after(Duration::from_millis(1_200)); // we're caught up, take a break
1565
+
dirty_nsids.extend(dirty);
1566
+
log::trace!("rolled up {n} items ({} collections now dirty)", dirty_nsids.len());
1568
+
_ = trim.tick() => {
1569
+
let n = dirty_nsids.len();
1570
+
log::trace!("trimming {n} nsids: {dirty_nsids:?}");
1571
+
let t0 = Instant::now();
1572
+
let (mut total_danglers, mut total_deleted) = (0, 0);
1573
+
let mut completed = HashSet::new();
1574
+
for collection in &dirty_nsids {
1575
+
let mut db = self.0.clone();
1576
+
let c = collection.clone();
1577
+
let (danglers, deleted, ended_early) = tokio::task::spawn_blocking(move || db.trim_collection(&c, 512, false)).await??;
1578
+
total_danglers += danglers;
1579
+
total_deleted += deleted;
1581
+
completed.insert(collection.clone());
1583
+
if total_deleted > 10_000_000 {
1584
+
log::info!("trim stopped early, more than 10M records already deleted.");
1588
+
for c in completed {
1589
+
dirty_nsids.remove(&c);
1591
+
log::info!("finished trimming {n} nsids in {:?}: {total_danglers} dangling and {total_deleted} total removed.", t0.elapsed());
···
1631
+
/// Set a value to a fixed key, erroring if the value already exists
1633
+
/// Intended for single-threaded init: not safe under concurrency, since there
1634
+
/// is no transaction between checking if the already exists and writing it.
1635
+
fn init_static_neu<K: StaticStr>(
1636
+
global: &PartitionHandle,
1637
+
value: impl DbBytes,
1638
+
) -> StorageResult<()> {
1639
+
let key_bytes = DbStaticStr::<K>::default().to_db_bytes()?;
1640
+
if global.get(&key_bytes)?.is_some() {
1641
+
return Err(StorageError::InitError(format!(
1642
+
"init failed: value for key {key_bytes:?} already exists"
1645
+
let value_bytes = value.to_db_bytes()?;
1646
+
global.insert(&key_bytes, &value_bytes)?;
/// Set a value to a fixed key
fn insert_batch_static_neu<K: StaticStr>(
···
////////// temp stuff to remove:
878
-
// fn summarize_batch<const LIMIT: usize>(batch: &EventBatch<LIMIT>) -> String {
880
-
// "batch of {: >3} samples from {: >4} records in {: >2} collections from ~{: >4} DIDs, {} acct removes, cursor {: <12?}",
881
-
// batch.total_records(),
882
-
// batch.total_seen(),
883
-
// batch.total_collections(),
884
-
// batch.estimate_dids(),
885
-
// batch.account_removes(),
886
-
// batch.latest_cursor().map(|c| c.elapsed()),
···
use serde_json::value::RawValue;
fn fjall_db() -> (FjallReader, FjallWriter) {
899
-
let (read, write, _) = FjallStorage::init(
1681
+
let (read, write, _, _) = FjallStorage::init(
tempfile::tempdir().unwrap(),
"offline test (no real jetstream endpoint)".to_string(),
···
const TEST_BATCH_LIMIT: usize = 16;
1692
+
fn beginning() -> HourTruncatedCursor {
1693
+
Cursor::from_start().into()
#[derive(Debug, Default)]
···
.entry(collection.clone())
954
-
.truncating_insert(commit)
1739
+
.truncating_insert(commit, &[0u8; 16])
···
.entry(collection.clone())
996
-
.truncating_insert(commit)
1781
+
.truncating_insert(commit, &[0u8; 16])
···
.entry(collection.clone())
1028
-
.truncating_insert(commit)
1813
+
.truncating_insert(commit, &[0u8; 16])
···
fn test_hello() -> anyhow::Result<()> {
let (read, mut write) = fjall_db();
write.insert_batch::<TEST_BATCH_LIMIT>(EventBatch::default())?;
1047
-
let (records, dids) =
1048
-
read.get_counts_by_collection(&Nsid::new("a.b.c".to_string()).unwrap())?;
1049
-
assert_eq!(records, 0);
1050
-
assert_eq!(dids, 0);
1836
+
} = read.get_collection_counts(
1837
+
&Nsid::new("a.b.c".to_string()).unwrap(),
1841
+
assert_eq!(creates, 0);
1842
+
assert_eq!(dids_estimate, 0);
···
write.insert_batch(batch.batch)?;
1861
+
write.step_rollup()?;
1070
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1071
-
assert_eq!(records, 1);
1072
-
assert_eq!(dids, 1);
1073
-
let (records, dids) =
1074
-
read.get_counts_by_collection(&Nsid::new("d.e.f".to_string()).unwrap())?;
1075
-
assert_eq!(records, 0);
1076
-
assert_eq!(dids, 0);
1867
+
} = read.get_collection_counts(&collection, beginning(), None)?;
1868
+
assert_eq!(creates, 1);
1869
+
assert_eq!(dids_estimate, 1);
1874
+
} = read.get_collection_counts(
1875
+
&Nsid::new("d.e.f".to_string()).unwrap(),
1879
+
assert_eq!(creates, 0);
1880
+
assert_eq!(dids_estimate, 0);
1078
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
1882
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
assert_eq!(records.len(), 1);
assert_eq!(rec.record.get(), "{}");
1085
-
read.get_records_by_collections(&[Nsid::new("d.e.f".to_string()).unwrap()], 2, false)?;
1888
+
let records = read.get_records_by_collections(
1889
+
[Nsid::new("d.e.f".to_string()).unwrap()].into(),
assert_eq!(records.len(), 0);
···
write.insert_batch(batch.batch)?;
let records = read.get_records_by_collections(
Nsid::new("a.a.a".to_string()).unwrap(),
Nsid::new("a.a.b".to_string()).unwrap(),
Nsid::new("a.a.c".to_string()).unwrap(),
···
write.insert_batch(batch.batch)?;
let records = read.get_records_by_collections(
Nsid::new("a.a.a".to_string()).unwrap(),
Nsid::new("a.a.b".to_string()).unwrap(),
Nsid::new("a.a.c".to_string()).unwrap(),
···
write.insert_batch(batch.batch)?;
2044
+
write.step_rollup()?;
1238
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1239
-
assert_eq!(records, 1);
1240
-
assert_eq!(dids, 1);
2050
+
} = read.get_collection_counts(&collection, beginning(), None)?;
2051
+
assert_eq!(creates, 1);
2052
+
assert_eq!(dids_estimate, 1);
1242
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
2054
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
assert_eq!(records.len(), 1);
assert_eq!(rec.record.get(), r#"{"ch": "ch-ch-ch-changes"}"#);
···
write.insert_batch(batch.batch)?;
2087
+
write.step_rollup()?;
1276
-
let (records, dids) = read.get_counts_by_collection(&collection)?;
1277
-
assert_eq!(records, 1);
1278
-
assert_eq!(dids, 1);
2093
+
} = read.get_collection_counts(&collection, beginning(), None)?;
2094
+
assert_eq!(creates, 1);
2095
+
assert_eq!(dids_estimate, 1);
1280
-
let records = read.get_records_by_collections(&[collection], 2, false)?;
2097
+
let records = read.get_records_by_collections([collection].into(), 2, false)?;
assert_eq!(records.len(), 0);
···
write.insert_batch(batch.batch)?;
let records = read.get_records_by_collections(
1326
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
2143
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
assert_eq!(records.len(), 1);
let records = read.get_records_by_collections(
1332
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
2149
+
HashSet::from([Nsid::new("a.a.b".to_string()).unwrap()]),
assert_eq!(records.len(), 10);
let records = read.get_records_by_collections(
1338
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
2155
+
HashSet::from([Nsid::new("a.a.c".to_string()).unwrap()]),
assert_eq!(records.len(), 1);
let records = read.get_records_by_collections(
1344
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
2161
+
HashSet::from([Nsid::new("a.a.d".to_string()).unwrap()]),
assert_eq!(records.len(), 0);
1350
-
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6)?;
1351
-
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6)?;
1352
-
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6)?;
1353
-
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6)?;
2167
+
write.trim_collection(&Nsid::new("a.a.a".to_string()).unwrap(), 6, false)?;
2168
+
write.trim_collection(&Nsid::new("a.a.b".to_string()).unwrap(), 6, false)?;
2169
+
write.trim_collection(&Nsid::new("a.a.c".to_string()).unwrap(), 6, false)?;
2170
+
write.trim_collection(&Nsid::new("a.a.d".to_string()).unwrap(), 6, false)?;
let records = read.get_records_by_collections(
1356
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
2173
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
assert_eq!(records.len(), 1);
let records = read.get_records_by_collections(
1362
-
&[Nsid::new("a.a.b".to_string()).unwrap()],
2179
+
HashSet::from([Nsid::new("a.a.b".to_string()).unwrap()]),
assert_eq!(records.len(), 6);
let records = read.get_records_by_collections(
1368
-
&[Nsid::new("a.a.c".to_string()).unwrap()],
2185
+
HashSet::from([Nsid::new("a.a.c".to_string()).unwrap()]),
assert_eq!(records.len(), 1);
let records = read.get_records_by_collections(
1374
-
&[Nsid::new("a.a.d".to_string()).unwrap()],
2191
+
HashSet::from([Nsid::new("a.a.d".to_string()).unwrap()]),
···
write.insert_batch(batch.batch)?;
let records = read.get_records_by_collections(
1411
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
2228
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
···
assert_eq!(records_deleted, 2);
let records = read.get_records_by_collections(
1422
-
&[Nsid::new("a.a.a".to_string()).unwrap()],
2239
+
HashSet::from([Nsid::new("a.a.a".to_string()).unwrap()]),
···
1454
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
2270
+
let records = read.get_records_by_collections(
2271
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
assert_eq!(records.len(), 0);
···
write.insert_batch(batch.batch)?;
1476
-
let n = write.step_rollup()?;
2296
+
let (n, _) = write.step_rollup()?;
let mut batch = TestBatch::default();
batch.delete_account("did:plc:person-a", 10_001);
write.insert_batch(batch.batch)?;
1484
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
2303
+
let records = read.get_records_by_collections(
2304
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
assert_eq!(records.len(), 1);
1487
-
let n = write.step_rollup()?;
2310
+
let (n, _) = write.step_rollup()?;
1491
-
read.get_records_by_collections(&[Nsid::new("a.a.a".to_string()).unwrap()], 1, false)?;
2313
+
let records = read.get_records_by_collections(
2314
+
[Nsid::new("a.a.a".to_string()).unwrap()].into(),
assert_eq!(records.len(), 0);
let mut batch = TestBatch::default();
batch.delete_account("did:plc:person-a", 9_999);
write.insert_batch(batch.batch)?;
1498
-
let n = write.step_rollup()?;
2324
+
let (n, _) = write.step_rollup()?;
···
write.insert_batch(batch.batch)?;
1532
-
let n = write.step_rollup()?;
2358
+
let (n, _) = write.step_rollup()?;
1535
-
let n = write.step_rollup()?;
2361
+
let (n, _) = write.step_rollup()?;
···
write.insert_batch(batch.batch)?;
1583
-
let (records, dids) =
1584
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1585
-
assert_eq!(records, 3);
1586
-
assert_eq!(dids, 2);
2413
+
} = read.get_collection_counts(
2414
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2418
+
assert_eq!(creates, 0);
2419
+
assert_eq!(dids_estimate, 0);
1589
-
let n = write.step_rollup()?;
2422
+
let (n, _) = write.step_rollup()?;
1592
-
let (records, dids) =
1593
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1594
-
assert_eq!(records, 3);
1595
-
assert_eq!(dids, 2);
2429
+
} = read.get_collection_counts(
2430
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2434
+
assert_eq!(creates, 2);
2435
+
assert_eq!(dids_estimate, 2);
// delete account rolled up
1598
-
let n = write.step_rollup()?;
2438
+
let (n, _) = write.step_rollup()?;
1601
-
let (records, dids) =
1602
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1603
-
assert_eq!(records, 3);
1604
-
assert_eq!(dids, 2);
2445
+
} = read.get_collection_counts(
2446
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2450
+
assert_eq!(creates, 2);
2451
+
assert_eq!(dids_estimate, 2);
// second batch rolled up
1607
-
let n = write.step_rollup()?;
2454
+
let (n, _) = write.step_rollup()?;
1610
-
let (records, dids) =
1611
-
read.get_counts_by_collection(&Nsid::new("a.a.a".to_string()).unwrap())?;
1612
-
assert_eq!(records, 3);
1613
-
assert_eq!(dids, 2);
2461
+
} = read.get_collection_counts(
2462
+
&Nsid::new("a.a.a".to_string()).unwrap(),
2466
+
assert_eq!(creates, 3);
2467
+
assert_eq!(dids_estimate, 2);
1616
-
let n = write.step_rollup()?;
2470
+
let (n, _) = write.step_rollup()?;
1623
-
fn get_top_collections() -> anyhow::Result<()> {
2477
+
fn get_prefix_children_lexi_empty() {
2478
+
let (read, _) = fjall_db();
2489
+
NsidPrefix::new("aaa.aaa").unwrap(),
2491
+
OrderCollectionsBy::Lexi { cursor: None },
2497
+
assert_eq!(creates, 0);
2498
+
assert_eq!(dids_estimate, 0);
2499
+
assert_eq!(children, vec![]);
2500
+
assert_eq!(cursor, None);
2504
+
fn get_prefix_excludes_exact_collection() -> anyhow::Result<()> {
let (read, mut write) = fjall_db();
let mut batch = TestBatch::default();
···
2517
+
write.insert_batch(batch.batch)?;
2518
+
write.step_rollup()?;
2528
+
) = read.get_prefix(
2529
+
NsidPrefix::new("a.a.a").unwrap(),
2531
+
OrderCollectionsBy::Lexi { cursor: None },
2535
+
assert_eq!(creates, 0);
2536
+
assert_eq!(dids_estimate, 0);
2537
+
assert_eq!(children, vec![]);
2538
+
assert_eq!(cursor, None);
2543
+
fn get_prefix_excludes_neighbour_collection() -> anyhow::Result<()> {
2544
+
let (read, mut write) = fjall_db();
2546
+
let mut batch = TestBatch::default();
1637
-
"did:plc:person-b",
2548
+
"did:plc:person-a",
2556
+
write.insert_batch(batch.batch)?;
2557
+
write.step_rollup()?;
2567
+
) = read.get_prefix(
2568
+
NsidPrefix::new("a.a.a").unwrap(),
2570
+
OrderCollectionsBy::Lexi { cursor: None },
2574
+
assert_eq!(creates, 0);
2575
+
assert_eq!(dids_estimate, 0);
2576
+
assert_eq!(children, vec![]);
2577
+
assert_eq!(cursor, None);
2582
+
fn get_prefix_includes_child_collection() -> anyhow::Result<()> {
2583
+
let (read, mut write) = fjall_db();
2585
+
let mut batch = TestBatch::default();
1646
-
"did:plc:person-c",
2587
+
"did:plc:person-a",
2595
+
write.insert_batch(batch.batch)?;
2596
+
write.step_rollup()?;
2606
+
) = read.get_prefix(
2607
+
NsidPrefix::new("a.a").unwrap(),
2609
+
OrderCollectionsBy::Lexi { cursor: None },
2613
+
assert_eq!(creates, 1);
2614
+
assert_eq!(dids_estimate, 1);
2617
+
vec![PrefixChild::Collection(NsidCount {
2618
+
nsid: "a.a.a".to_string(),
2623
+
assert_eq!(cursor, None);
2628
+
fn get_prefix_includes_child_prefix() -> anyhow::Result<()> {
2629
+
let (read, mut write) = fjall_db();
2631
+
let mut batch = TestBatch::default();
1659
-
Some("rev-aaa-2"),
write.insert_batch(batch.batch)?;
2642
+
write.step_rollup()?;
1665
-
let n = write.step_rollup()?;
1666
-
assert_eq!(n, 3); // 3 collections
1668
-
let tops = read.get_top_collections()?;
2652
+
) = read.get_prefix(
2653
+
NsidPrefix::new("a.a").unwrap(),
2655
+
OrderCollectionsBy::Lexi { cursor: None },
2659
+
assert_eq!(creates, 1);
2660
+
assert_eq!(dids_estimate, 1);
1674
-
nsid_child_segments: HashMap::from([(
1679
-
nsid_child_segments: HashMap::from([
1685
-
nsid_child_segments: HashMap::from([
1691
-
nsid_child_segments: HashMap::from([]),
1699
-
nsid_child_segments: HashMap::from([]),
1710
-
nsid_child_segments: HashMap::from([(
1715
-
nsid_child_segments: HashMap::from([]),
2663
+
vec![PrefixChild::Prefix(PrefixCount {
2664
+
prefix: "a.a.a".to_string(),
2669
+
assert_eq!(cursor, None);
1729
-
fn get_top_collections_with_parent_nsid() -> anyhow::Result<()> {
2674
+
fn get_prefix_merges_child_prefixes() -> anyhow::Result<()> {
let (read, mut write) = fjall_db();
let mut batch = TestBatch::default();
1734
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
2679
+
"did:plc:person-a",
1737
-
r#""child nsid""#,
1743
-
"did:plc:inze6wrmsm7pjl7yta3oig77",
1746
-
r#""parent nsid""#,
2688
+
"did:plc:person-a",
write.insert_batch(batch.batch)?;
1753
-
let n = write.step_rollup()?;
1754
-
assert_eq!(n, 2); // 3 collections
2697
+
write.step_rollup()?;
1756
-
let tops = read.get_top_collections()?;
2707
+
) = read.get_prefix(
2708
+
NsidPrefix::new("a.a").unwrap(),
2710
+
OrderCollectionsBy::Lexi { cursor: None },
2714
+
assert_eq!(creates, 2);
2715
+
assert_eq!(dids_estimate, 1);
1762
-
nsid_child_segments: HashMap::from([(
1767
-
nsid_child_segments: HashMap::from([(
1772
-
nsid_child_segments: HashMap::from([(
1777
-
nsid_child_segments: HashMap::from([(
1782
-
nsid_child_segments: HashMap::from([]),
2718
+
vec![PrefixChild::Prefix(PrefixCount {
2719
+
prefix: "a.a.a".to_string(),
2724
+
assert_eq!(cursor, None);
1794
-
// TODO: handle leaf node counts explicitly, since parent NSIDs can be leaves themselves
2729
+
fn get_prefix_exact_and_child_and_prefix() -> anyhow::Result<()> {
2730
+
let (read, mut write) = fjall_db();
2732
+
let mut batch = TestBatch::default();
2735
+
"did:plc:person-a",
2745
+
"did:plc:person-a",
2755
+
"did:plc:person-a",
2759
+
Some("rev-aaaaa"),
2763
+
write.insert_batch(batch.batch)?;
2764
+
write.step_rollup()?;
2774
+
) = read.get_prefix(
2775
+
NsidPrefix::new("a.a.a").unwrap(),
2777
+
OrderCollectionsBy::Lexi { cursor: None },
2781
+
assert_eq!(creates, 2);
2782
+
assert_eq!(dids_estimate, 1);
2786
+
PrefixChild::Collection(NsidCount {
2787
+
nsid: "a.a.a.a".to_string(),
2791
+
PrefixChild::Prefix(PrefixCount {
2792
+
prefix: "a.a.a.a".to_string(),
2798
+
assert_eq!(cursor, None);