Constellation, Spacedust, Slingshot, UFOs: atproto crates and services for microcosm

even more backfill tweaks

it's done. maybe we won't do it again. that would be nice.

Changed files
+46 -38
ufos
+24
ufos/src/lib.rs
···
use serde_json::value::RawValue;
use sha2::Sha256;
use std::collections::HashMap;
+
use std::time::Duration;
fn did_element(sketch_secret: &SketchSecretPrefix, did: &Did) -> Element<14> {
Element::from_digest_with_prefix::<Sha256>(sketch_secret, did.as_bytes())
+
}
+
+
pub fn nice_duration(dt: Duration) -> String {
+
let secs = dt.as_secs_f64();
+
if secs < 1. {
+
return format!("{:.0}ms", secs * 1000.);
+
}
+
if secs < 60. {
+
return format!("{secs:.02}s");
+
}
+
let mins = (secs / 60.).floor();
+
let rsecs = secs - (mins * 60.);
+
if mins < 60. {
+
return format!("{mins:.0}m{rsecs:.0}s");
+
}
+
let hrs = (mins / 60.).floor();
+
let rmins = mins - (hrs * 60.);
+
if hrs < 24. {
+
return format!("{hrs:.0}h{rmins:.0}m{rsecs:.0}s");
+
}
+
let days = (hrs / 24.).floor();
+
let rhrs = hrs - (days * 24.);
+
format!("{days:.0}d{rhrs:.0}h{rmins:.0}m{rsecs:.0}s")
}
#[derive(Debug, Default, Clone)]
+1 -24
ufos/src/main.rs
···
use ufos::storage_fjall::FjallStorage;
use ufos::storage_mem::MemStorage;
use ufos::store_types::SketchSecretPrefix;
-
use ufos::ConsumerInfo;
+
use ufos::{nice_duration, ConsumerInfo};
#[cfg(not(target_env = "msvc"))]
use tikv_jemallocator::Jemalloc;
···
started_at: SystemTime,
now: SystemTime,
) {
-
let nice_duration = |dt: Duration| {
-
let secs = dt.as_secs_f64();
-
if secs < 1. {
-
return format!("{:.0}ms", secs * 1000.);
-
}
-
if secs < 60. {
-
return format!("{secs:.02}s");
-
}
-
let mins = (secs / 60.).floor();
-
let rsecs = secs - (mins * 60.);
-
if mins < 60. {
-
return format!("{mins:.0}m{rsecs:.0}s");
-
}
-
let hrs = (mins / 60.).floor();
-
let rmins = mins - (hrs * 60.);
-
if hrs < 24. {
-
return format!("{hrs:.0}h{rmins:.0}m{rsecs:.0}s");
-
}
-
let days = (hrs / 24.).floor();
-
let rhrs = hrs - (days * 24.);
-
format!("{days:.0}d{rhrs:.0}h{rmins:.0}m{rsecs:.0}s")
-
};
-
let nice_dt_two_maybes = |earlier: Option<Cursor>, later: Option<Cursor>| match (earlier, later)
{
(Some(earlier), Some(later)) => match later.duration_since(&earlier) {
+21 -14
ufos/src/storage_fjall.rs
···
WEEK_IN_MICROS,
};
use crate::{
-
CommitAction, ConsumerInfo, Did, EventBatch, Nsid, NsidCount, OrderCollectionsBy, UFOsRecord,
+
nice_duration, CommitAction, ConsumerInfo, Did, EventBatch, Nsid, NsidCount,
+
OrderCollectionsBy, UFOsRecord,
};
use async_trait::async_trait;
use fjall::{
···
let mut live_records_found = 0;
let mut candidate_new_feed_lower_cursor = None;
-
let mut ended_early = false;
+
let ended_early = false;
+
let mut current_cursor: Option<Cursor> = None;
for (i, kv) in self.feeds.range(live_range).rev().enumerate() {
if i > 0 && i % 500_000 == 0 {
-
log::info!("trim: at {i} for {:?}", collection.to_string());
-
}
-
if !full_scan && i > 10_000_000 {
log::info!(
-
"stopping trim early for {:?}: already scanned 10M elements",
-
collection.to_string()
+
"trim: at {i} for {:?} (now at {})",
+
collection.to_string(),
+
current_cursor
+
.map(|c| c
+
.elapsed()
+
.map(nice_duration)
+
.unwrap_or("[not past]".into()))
+
.unwrap_or("??".into()),
);
-
ended_early = true;
-
break;
let (key_bytes, val_bytes) = kv?;
let feed_key = db_complete::<NsidRecordFeedKey>(&key_bytes)?;
···
};
let (meta, _) = RecordLocationMeta::from_db_bytes(&location_val_bytes)?;
+
current_cursor = Some(meta.cursor());
if meta.cursor() != feed_key.cursor() {
// older/different version
···
async fn run(mut self, backfill: bool) -> StorageResult<()> {
let mut dirty_nsids = HashSet::new();
+
// backfill condition here is iffy -- longer is good when doing the main ingest and then collection trims
+
// shorter once those are done helps things catch up
+
// the best setting for non-backfill is non-obvious.. it can be pretty slow and still be fine
let mut rollup =
-
tokio::time::interval(Duration::from_micros(if backfill { 1_000 } else { 81_000 }));
-
rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+
tokio::time::interval(Duration::from_micros(if backfill { 100 } else { 32_000 }));
+
rollup.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
-
let mut trim =
-
tokio::time::interval(Duration::from_millis(if backfill { 500 } else { 6_000 }));
+
// backfill condition again iffy. collection trims should probably happen in their own phase.
+
let mut trim = tokio::time::interval(Duration::from_secs(if backfill { 18 } else { 9 }));
trim.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
loop {
···
completed.insert(collection.clone());
if total_deleted > 10_000_000 {
-
log::info!("trim stopped early, more than 100M records already deleted.");
+
log::info!("trim stopped early, more than 10M records already deleted.");
break;